aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.github/ISSUE_TEMPLATE/1_broken_site.md6
-rw-r--r--.github/ISSUE_TEMPLATE/2_site_support_request.md4
-rw-r--r--.github/ISSUE_TEMPLATE/3_site_feature_request.md4
-rw-r--r--.github/ISSUE_TEMPLATE/4_bug_report.md6
-rw-r--r--.github/ISSUE_TEMPLATE/5_feature_request.md4
-rw-r--r--.github/workflows/python-publish.yml33
-rw-r--r--.gitignore9
-rw-r--r--.travis.yml17
-rw-r--r--CONTRIBUTING.md2
-rw-r--r--ChangeLog592
-rw-r--r--MANIFEST.in6
-rw-r--r--Makefile84
-rw-r--r--README.md1098
-rwxr-xr-xbin/youtube-dl6
-rw-r--r--devscripts/bash-completion.in4
-rwxr-xr-xdevscripts/bash-completion.py6
-rw-r--r--devscripts/buildserver.py4
-rw-r--r--devscripts/check-porn.py4
-rw-r--r--devscripts/create-github-release.py24
-rw-r--r--devscripts/fish-completion.in2
-rwxr-xr-xdevscripts/fish-completion.py10
-rw-r--r--devscripts/generate_aes_testdata.py4
-rwxr-xr-xdevscripts/gh-pages/add-version.py6
-rwxr-xr-xdevscripts/gh-pages/update-feed.py10
-rwxr-xr-xdevscripts/gh-pages/update-sites.py6
-rwxr-xr-xdevscripts/make_contributing.py20
-rw-r--r--devscripts/make_issue_template.py6
-rw-r--r--devscripts/make_lazy_extractors.py4
-rwxr-xr-xdevscripts/make_readme.py4
-rw-r--r--devscripts/make_supportedsites.py6
-rw-r--r--devscripts/prepare_manpage.py6
-rwxr-xr-xdevscripts/release.sh22
-rw-r--r--devscripts/show-downloads-statistics.py10
-rw-r--r--devscripts/zsh-completion.in6
-rwxr-xr-xdevscripts/zsh-completion.py6
-rw-r--r--docs/Makefile8
-rw-r--r--docs/conf.py10
-rw-r--r--docs/index.rst6
-rw-r--r--docs/module_guide.rst8
-rw-r--r--docs/supportedsites.md89
-rw-r--r--make_win.bat1
-rw-r--r--setup.cfg2
-rw-r--r--setup.py149
-rw-r--r--test/helper.py10
-rw-r--r--test/test_InfoExtractor.py8
-rw-r--r--test/test_YoutubeDL.py40
-rw-r--r--test/test_YoutubeDLCookieJar.py9
-rw-r--r--test/test_aes.py4
-rw-r--r--test/test_age_restriction.py2
-rw-r--r--test/test_all_urls.py10
-rw-r--r--test/test_cache.py2
-rw-r--r--test/test_compat.py14
-rw-r--r--test/test_download.py12
-rw-r--r--test/test_downloader_http.py8
-rw-r--r--test/test_execution.py10
-rw-r--r--test/test_http.py4
-rw-r--r--test/test_iqiyi_sdk_interpreter.py2
-rw-r--r--test/test_jsinterp.py2
-rw-r--r--test/test_netrc.py2
-rw-r--r--test/test_options.py2
-rw-r--r--test/test_postprocessors.py2
-rw-r--r--test/test_socks.py2
-rw-r--r--test/test_subtitles.py15
-rw-r--r--test/test_swfinterp.py2
-rw-r--r--test/test_update.py2
-rw-r--r--test/test_utils.py40
-rw-r--r--test/test_verbose_output.py8
-rw-r--r--test/test_write_annotations.py8
-rw-r--r--test/test_youtube_chapters.py4
-rw-r--r--test/test_youtube_lists.py2
-rw-r--r--test/test_youtube_signature.py26
-rw-r--r--test/testdata/cookies/malformed_cookies.txt9
-rw-r--r--tox.ini2
-rw-r--r--youtube-dl.plugin.zsh24
-rwxr-xr-xyoutube_dl/YoutubeDL.py2417
-rw-r--r--youtube_dl/__init__.py483
-rwxr-xr-xyoutube_dl/__main__.py19
-rw-r--r--youtube_dl/cache.py96
-rw-r--r--youtube_dl/compat.py3026
-rw-r--r--youtube_dl/downloader/__init__.py61
-rw-r--r--youtube_dl/downloader/common.py391
-rw-r--r--youtube_dl/downloader/fragment.py269
-rw-r--r--youtube_dl/downloader/hls.py210
-rw-r--r--youtube_dl/downloader/http.py354
-rw-r--r--youtube_dl/extractor/abc.py193
-rw-r--r--youtube_dl/extractor/abcotvs.py112
-rw-r--r--youtube_dl/extractor/addanime.py95
-rw-r--r--youtube_dl/extractor/adobetv.py197
-rw-r--r--youtube_dl/extractor/americastestkitchen.py92
-rw-r--r--youtube_dl/extractor/appletrailers.py283
-rw-r--r--youtube_dl/extractor/ard.py400
-rw-r--r--youtube_dl/extractor/atresplayer.py202
-rw-r--r--youtube_dl/extractor/audioboom.py69
-rw-r--r--youtube_dl/extractor/azmedien.py86
-rw-r--r--youtube_dl/extractor/bambuser.py142
-rw-r--r--youtube_dl/extractor/bandcamp.py417
-rw-r--r--youtube_dl/extractor/bbc.py1359
-rw-r--r--youtube_dl/extractor/bellmedia.py83
-rw-r--r--youtube_dl/extractor/bilibili.py421
-rw-r--r--youtube_dl/extractor/biqle.py97
-rw-r--r--youtube_dl/extractor/bitchute.py135
-rw-r--r--youtube_dl/extractor/brightcove.py651
-rw-r--r--youtube_dl/extractor/businessinsider.py42
-rw-r--r--youtube_dl/extractor/canvas.py319
-rw-r--r--youtube_dl/extractor/cbc.py457
-rw-r--r--youtube_dl/extractor/ceskatelevize.py287
-rw-r--r--youtube_dl/extractor/channel9.py262
-rw-r--r--youtube_dl/extractor/chaturbate.py81
-rw-r--r--youtube_dl/extractor/cloudflarestream.py63
-rw-r--r--youtube_dl/extractor/comcarcoff.py74
-rw-r--r--youtube_dl/extractor/common.py2974
-rw-r--r--youtube_dl/extractor/commonmistakes.py50
-rw-r--r--youtube_dl/extractor/corus.py105
-rw-r--r--youtube_dl/extractor/crunchyroll.py680
-rw-r--r--youtube_dl/extractor/dailymotion.py512
-rw-r--r--youtube_dl/extractor/daisuki.py154
-rw-r--r--youtube_dl/extractor/daum.py308
-rw-r--r--youtube_dl/extractor/dctp.py115
-rw-r--r--youtube_dl/extractor/discovery.py119
-rw-r--r--youtube_dl/extractor/discoverynetworks.py65
-rw-r--r--youtube_dl/extractor/dplay.py376
-rw-r--r--youtube_dl/extractor/dreisat.py193
-rw-r--r--youtube_dl/extractor/dropbox.py40
-rw-r--r--youtube_dl/extractor/drtv.py305
-rw-r--r--youtube_dl/extractor/dumpert.py69
-rw-r--r--youtube_dl/extractor/eighttracks.py164
-rw-r--r--youtube_dl/extractor/eporner.py130
-rw-r--r--youtube_dl/extractor/extractors.py1520
-rw-r--r--youtube_dl/extractor/facebook.py501
-rw-r--r--youtube_dl/extractor/flipagram.py115
-rw-r--r--youtube_dl/extractor/fox9.py42
-rw-r--r--youtube_dl/extractor/franceculture.py63
-rw-r--r--youtube_dl/extractor/francetv.py516
-rw-r--r--youtube_dl/extractor/gameone.py134
-rw-r--r--youtube_dl/extractor/generic.py3426
-rw-r--r--youtube_dl/extractor/giantbomb.py87
-rw-r--r--youtube_dl/extractor/globo.py234
-rw-r--r--youtube_dl/extractor/go.py227
-rw-r--r--youtube_dl/extractor/go90.py149
-rw-r--r--youtube_dl/extractor/hark.py33
-rw-r--r--youtube_dl/extractor/hellporno.py75
-rw-r--r--youtube_dl/extractor/hotstar.py205
-rw-r--r--youtube_dl/extractor/iconosquare.py85
-rw-r--r--youtube_dl/extractor/imdb.py121
-rw-r--r--youtube_dl/extractor/imgur.py154
-rw-r--r--youtube_dl/extractor/indavideo.py128
-rw-r--r--youtube_dl/extractor/internetvideoarchive.py100
-rw-r--r--youtube_dl/extractor/iprima.py132
-rw-r--r--youtube_dl/extractor/ivi.py220
-rw-r--r--youtube_dl/extractor/jamendo.py150
-rw-r--r--youtube_dl/extractor/joj.py108
-rw-r--r--youtube_dl/extractor/jpopsukitv.py68
-rw-r--r--youtube_dl/extractor/jwplatform.py41
-rw-r--r--youtube_dl/extractor/kakao.py149
-rw-r--r--youtube_dl/extractor/kaltura.py370
-rw-r--r--youtube_dl/extractor/keek.py39
-rw-r--r--youtube_dl/extractor/kontrtube.py73
-rw-r--r--youtube_dl/extractor/la7.py67
-rw-r--r--youtube_dl/extractor/learnr.py33
-rw-r--r--youtube_dl/extractor/lecturio.py244
-rw-r--r--youtube_dl/extractor/lego.py128
-rw-r--r--youtube_dl/extractor/limelight.py377
-rw-r--r--youtube_dl/extractor/linuxacademy.py174
-rw-r--r--youtube_dl/extractor/lnkgo.py116
-rw-r--r--youtube_dl/extractor/macgamestore.py42
-rw-r--r--youtube_dl/extractor/mailru.py314
-rw-r--r--youtube_dl/extractor/makertv.py32
-rw-r--r--youtube_dl/extractor/malltv.py53
-rw-r--r--youtube_dl/extractor/mangomolo.py53
-rw-r--r--youtube_dl/extractor/mediaset.py163
-rw-r--r--youtube_dl/extractor/mediasite.py366
-rw-r--r--youtube_dl/extractor/minhateca.py70
-rw-r--r--youtube_dl/extractor/mit.py156
-rw-r--r--youtube_dl/extractor/mitele.py120
-rw-r--r--youtube_dl/extractor/mixcloud.py398
-rw-r--r--youtube_dl/extractor/mofosex.py56
-rw-r--r--youtube_dl/extractor/motherless.py205
-rw-r--r--youtube_dl/extractor/msn.py115
-rw-r--r--youtube_dl/extractor/mtv.py490
-rw-r--r--youtube_dl/extractor/musicplayon.py66
-rw-r--r--youtube_dl/extractor/myspass.py73
-rw-r--r--youtube_dl/extractor/naver.py128
-rw-r--r--youtube_dl/extractor/nbc.py482
-rw-r--r--youtube_dl/extractor/ndr.py389
-rw-r--r--youtube_dl/extractor/nexx.py451
-rw-r--r--youtube_dl/extractor/nhk.py96
-rw-r--r--youtube_dl/extractor/nintendo.py46
-rw-r--r--youtube_dl/extractor/nova.py256
-rw-r--r--youtube_dl/extractor/nowness.py147
-rw-r--r--youtube_dl/extractor/npr.py108
-rw-r--r--youtube_dl/extractor/nrk.py677
-rw-r--r--youtube_dl/extractor/nrl.py30
-rw-r--r--youtube_dl/extractor/ntvru.py132
-rw-r--r--youtube_dl/extractor/nytimes.py223
-rw-r--r--youtube_dl/extractor/odnoklassniki.py259
-rw-r--r--youtube_dl/extractor/onet.py250
-rw-r--r--youtube_dl/extractor/onionstudios.py81
-rw-r--r--youtube_dl/extractor/ooyala.py207
-rw-r--r--youtube_dl/extractor/openload.py501
-rw-r--r--youtube_dl/extractor/orf.py427
-rw-r--r--youtube_dl/extractor/pandatv.py99
-rw-r--r--youtube_dl/extractor/patreon.py136
-rw-r--r--youtube_dl/extractor/peertube.py547
-rw-r--r--youtube_dl/extractor/periscope.py171
-rw-r--r--youtube_dl/extractor/phoenix.py45
-rw-r--r--youtube_dl/extractor/platzi.py224
-rw-r--r--youtube_dl/extractor/pokemon.py75
-rw-r--r--youtube_dl/extractor/pornhd.py109
-rw-r--r--youtube_dl/extractor/pornhub.py569
-rw-r--r--youtube_dl/extractor/prosiebensat1.py500
-rw-r--r--youtube_dl/extractor/puhutv.py247
-rw-r--r--youtube_dl/extractor/redtube.py119
-rw-r--r--youtube_dl/extractor/revision3.py170
-rw-r--r--youtube_dl/extractor/roosterteeth.py148
-rw-r--r--youtube_dl/extractor/safari.py264
-rw-r--r--youtube_dl/extractor/scrippsnetworks.py104
-rw-r--r--youtube_dl/extractor/seeker.py57
-rw-r--r--youtube_dl/extractor/servingsys.py72
-rw-r--r--youtube_dl/extractor/servus.py56
-rw-r--r--youtube_dl/extractor/shared.py127
-rw-r--r--youtube_dl/extractor/slideslive.py39
-rw-r--r--youtube_dl/extractor/sohu.py202
-rw-r--r--youtube_dl/extractor/soundcloud.py795
-rw-r--r--youtube_dl/extractor/spankbang.py180
-rw-r--r--youtube_dl/extractor/spankwire.py127
-rw-r--r--youtube_dl/extractor/spike.py57
-rw-r--r--youtube_dl/extractor/sportdeutschland.py100
-rw-r--r--youtube_dl/extractor/srmediathek.py59
-rw-r--r--youtube_dl/extractor/streamango.py128
-rw-r--r--youtube_dl/extractor/streamcloud.py78
-rw-r--r--youtube_dl/extractor/stretchinternet.py48
-rw-r--r--youtube_dl/extractor/stv.py94
-rw-r--r--youtube_dl/extractor/svt.py371
-rw-r--r--youtube_dl/extractor/teachable.py266
-rw-r--r--youtube_dl/extractor/teachingchannel.py35
-rw-r--r--youtube_dl/extractor/teamcoco.py199
-rw-r--r--youtube_dl/extractor/tele5.py57
-rw-r--r--youtube_dl/extractor/telecinco.py156
-rw-r--r--youtube_dl/extractor/telegraaf.py78
-rw-r--r--youtube_dl/extractor/telequebec.py207
-rw-r--r--youtube_dl/extractor/tfo.py57
-rw-r--r--youtube_dl/extractor/thesun.py32
-rw-r--r--youtube_dl/extractor/thisoldhouse.py44
-rw-r--r--youtube_dl/extractor/toggle.py210
-rw-r--r--youtube_dl/extractor/trunews.py75
-rw-r--r--youtube_dl/extractor/tumblr.py214
-rw-r--r--youtube_dl/extractor/tutv.py36
-rw-r--r--youtube_dl/extractor/tv2.py145
-rw-r--r--youtube_dl/extractor/tv4.py124
-rw-r--r--youtube_dl/extractor/tv5mondeplus.py79
-rw-r--r--youtube_dl/extractor/tva.py54
-rw-r--r--youtube_dl/extractor/tvnow.py486
-rw-r--r--youtube_dl/extractor/tvplay.py557
-rw-r--r--youtube_dl/extractor/twentyfourvideo.py127
-rw-r--r--youtube_dl/extractor/twitch.py731
-rw-r--r--youtube_dl/extractor/twitter.py575
-rw-r--r--youtube_dl/extractor/udemy.py481
-rw-r--r--youtube_dl/extractor/ufctv.py73
-rw-r--r--youtube_dl/extractor/uol.py159
-rw-r--r--youtube_dl/extractor/vice.py337
-rw-r--r--youtube_dl/extractor/videa.py106
-rw-r--r--youtube_dl/extractor/videodetective.py30
-rw-r--r--youtube_dl/extractor/videopremium.py46
-rw-r--r--youtube_dl/extractor/vidzi.py68
-rw-r--r--youtube_dl/extractor/viewlift.py302
-rw-r--r--youtube_dl/extractor/viewster.py217
-rw-r--r--youtube_dl/extractor/viki.py384
-rw-r--r--youtube_dl/extractor/vimeo.py1131
-rw-r--r--youtube_dl/extractor/vk.py644
-rw-r--r--youtube_dl/extractor/vlive.py405
-rw-r--r--youtube_dl/extractor/vodplatform.py37
-rw-r--r--youtube_dl/extractor/voicerepublic.py100
-rw-r--r--youtube_dl/extractor/vzaar.py95
-rw-r--r--youtube_dl/extractor/wistia.py126
-rw-r--r--youtube_dl/extractor/xfileshare.py213
-rw-r--r--youtube_dl/extractor/xhamster.py381
-rw-r--r--youtube_dl/extractor/xtube.py180
-rw-r--r--youtube_dl/extractor/yahoo.py689
-rw-r--r--youtube_dl/extractor/yandexmusic.py313
-rw-r--r--youtube_dl/extractor/youjizz.py95
-rw-r--r--youtube_dl/extractor/youporn.py192
-rw-r--r--youtube_dl/extractor/yourporn.py57
-rw-r--r--youtube_dl/extractor/youtube.py3327
-rw-r--r--youtube_dl/extractor/zapiks.py110
-rw-r--r--youtube_dl/extractor/zdf.py320
-rw-r--r--youtube_dl/extractor/zype.py57
-rw-r--r--youtube_dl/options.py916
-rw-r--r--youtube_dl/postprocessor/embedthumbnail.py93
-rw-r--r--youtube_dl/postprocessor/ffmpeg.py646
-rw-r--r--youtube_dl/update.py187
-rw-r--r--youtube_dl/utils.py5593
-rw-r--r--youtube_dl/version.py3
-rw-r--r--youtube_dlc/YoutubeDL.py2417
-rw-r--r--youtube_dlc/__init__.py483
-rw-r--r--youtube_dlc/__main__.py19
-rw-r--r--youtube_dlc/aes.py (renamed from youtube_dl/aes.py)0
-rw-r--r--youtube_dlc/cache.py96
-rw-r--r--youtube_dlc/compat.py3050
-rw-r--r--youtube_dlc/downloader/__init__.py63
-rw-r--r--youtube_dlc/downloader/common.py391
-rw-r--r--youtube_dlc/downloader/dash.py (renamed from youtube_dl/downloader/dash.py)0
-rw-r--r--youtube_dlc/downloader/external.py (renamed from youtube_dl/downloader/external.py)0
-rw-r--r--youtube_dlc/downloader/f4m.py (renamed from youtube_dl/downloader/f4m.py)0
-rw-r--r--youtube_dlc/downloader/fragment.py269
-rw-r--r--youtube_dlc/downloader/hls.py210
-rw-r--r--youtube_dlc/downloader/http.py354
-rw-r--r--youtube_dlc/downloader/ism.py (renamed from youtube_dl/downloader/ism.py)0
-rw-r--r--youtube_dlc/downloader/rtmp.py (renamed from youtube_dl/downloader/rtmp.py)0
-rw-r--r--youtube_dlc/downloader/rtsp.py (renamed from youtube_dl/downloader/rtsp.py)0
-rw-r--r--youtube_dlc/downloader/youtube_live_chat.py94
-rw-r--r--youtube_dlc/extractor/__init__.py (renamed from youtube_dl/extractor/__init__.py)0
-rw-r--r--youtube_dlc/extractor/abc.py193
-rw-r--r--youtube_dlc/extractor/abcnews.py (renamed from youtube_dl/extractor/abcnews.py)0
-rw-r--r--youtube_dlc/extractor/abcotvs.py137
-rw-r--r--youtube_dlc/extractor/academicearth.py (renamed from youtube_dl/extractor/academicearth.py)0
-rw-r--r--youtube_dlc/extractor/acast.py (renamed from youtube_dl/extractor/acast.py)0
-rw-r--r--youtube_dlc/extractor/adn.py (renamed from youtube_dl/extractor/adn.py)0
-rw-r--r--youtube_dlc/extractor/adobeconnect.py (renamed from youtube_dl/extractor/adobeconnect.py)0
-rw-r--r--youtube_dlc/extractor/adobepass.py (renamed from youtube_dl/extractor/adobepass.py)0
-rw-r--r--youtube_dlc/extractor/adobetv.py288
-rw-r--r--youtube_dlc/extractor/adultswim.py (renamed from youtube_dl/extractor/adultswim.py)0
-rw-r--r--youtube_dlc/extractor/aenetworks.py (renamed from youtube_dl/extractor/aenetworks.py)0
-rw-r--r--youtube_dlc/extractor/afreecatv.py (renamed from youtube_dl/extractor/afreecatv.py)0
-rw-r--r--youtube_dlc/extractor/airmozilla.py (renamed from youtube_dl/extractor/airmozilla.py)0
-rw-r--r--youtube_dlc/extractor/aliexpress.py (renamed from youtube_dl/extractor/aliexpress.py)0
-rw-r--r--youtube_dlc/extractor/aljazeera.py (renamed from youtube_dl/extractor/aljazeera.py)0
-rw-r--r--youtube_dlc/extractor/allocine.py (renamed from youtube_dl/extractor/allocine.py)0
-rw-r--r--youtube_dlc/extractor/alphaporno.py (renamed from youtube_dl/extractor/alphaporno.py)0
-rw-r--r--youtube_dlc/extractor/amcnetworks.py (renamed from youtube_dl/extractor/amcnetworks.py)0
-rw-r--r--youtube_dlc/extractor/americastestkitchen.py82
-rw-r--r--youtube_dlc/extractor/amp.py (renamed from youtube_dl/extractor/amp.py)0
-rw-r--r--youtube_dlc/extractor/animeondemand.py (renamed from youtube_dl/extractor/animeondemand.py)0
-rw-r--r--youtube_dlc/extractor/anvato.py (renamed from youtube_dl/extractor/anvato.py)0
-rw-r--r--youtube_dlc/extractor/aol.py (renamed from youtube_dl/extractor/aol.py)0
-rw-r--r--youtube_dlc/extractor/apa.py (renamed from youtube_dl/extractor/apa.py)0
-rw-r--r--youtube_dlc/extractor/aparat.py (renamed from youtube_dl/extractor/aparat.py)0
-rw-r--r--youtube_dlc/extractor/appleconnect.py (renamed from youtube_dl/extractor/appleconnect.py)0
-rw-r--r--youtube_dlc/extractor/appletrailers.py283
-rw-r--r--youtube_dlc/extractor/archiveorg.py (renamed from youtube_dl/extractor/archiveorg.py)0
-rw-r--r--youtube_dlc/extractor/ard.py422
-rw-r--r--youtube_dlc/extractor/arkena.py (renamed from youtube_dl/extractor/arkena.py)0
-rw-r--r--youtube_dlc/extractor/arte.py (renamed from youtube_dl/extractor/arte.py)0
-rw-r--r--youtube_dlc/extractor/asiancrush.py (renamed from youtube_dl/extractor/asiancrush.py)0
-rw-r--r--youtube_dlc/extractor/atresplayer.py118
-rw-r--r--youtube_dlc/extractor/atttechchannel.py (renamed from youtube_dl/extractor/atttechchannel.py)0
-rw-r--r--youtube_dlc/extractor/atvat.py (renamed from youtube_dl/extractor/atvat.py)0
-rw-r--r--youtube_dlc/extractor/audimedia.py (renamed from youtube_dl/extractor/audimedia.py)0
-rw-r--r--youtube_dlc/extractor/audioboom.py73
-rw-r--r--youtube_dlc/extractor/audiomack.py (renamed from youtube_dl/extractor/audiomack.py)0
-rw-r--r--youtube_dlc/extractor/awaan.py (renamed from youtube_dl/extractor/awaan.py)0
-rw-r--r--youtube_dlc/extractor/aws.py (renamed from youtube_dl/extractor/aws.py)0
-rw-r--r--youtube_dlc/extractor/azmedien.py66
-rw-r--r--youtube_dlc/extractor/baidu.py (renamed from youtube_dl/extractor/baidu.py)0
-rw-r--r--youtube_dlc/extractor/bandcamp.py417
-rw-r--r--youtube_dlc/extractor/bbc.py1359
-rw-r--r--youtube_dlc/extractor/beampro.py (renamed from youtube_dl/extractor/beampro.py)0
-rw-r--r--youtube_dlc/extractor/beatport.py (renamed from youtube_dl/extractor/beatport.py)0
-rw-r--r--youtube_dlc/extractor/beeg.py (renamed from youtube_dl/extractor/beeg.py)0
-rw-r--r--youtube_dlc/extractor/behindkink.py (renamed from youtube_dl/extractor/behindkink.py)0
-rw-r--r--youtube_dlc/extractor/bellmedia.py88
-rw-r--r--youtube_dlc/extractor/bet.py (renamed from youtube_dl/extractor/bet.py)0
-rw-r--r--youtube_dlc/extractor/bfi.py (renamed from youtube_dl/extractor/bfi.py)0
-rw-r--r--youtube_dlc/extractor/bigflix.py (renamed from youtube_dl/extractor/bigflix.py)0
-rw-r--r--youtube_dlc/extractor/bild.py (renamed from youtube_dl/extractor/bild.py)0
-rw-r--r--youtube_dlc/extractor/bilibili.py450
-rw-r--r--youtube_dlc/extractor/biobiochiletv.py (renamed from youtube_dl/extractor/biobiochiletv.py)0
-rw-r--r--youtube_dlc/extractor/biqle.py105
-rw-r--r--youtube_dlc/extractor/bitchute.py142
-rw-r--r--youtube_dlc/extractor/bleacherreport.py (renamed from youtube_dl/extractor/bleacherreport.py)0
-rw-r--r--youtube_dlc/extractor/blinkx.py (renamed from youtube_dl/extractor/blinkx.py)0
-rw-r--r--youtube_dlc/extractor/bloomberg.py (renamed from youtube_dl/extractor/bloomberg.py)0
-rw-r--r--youtube_dlc/extractor/bokecc.py (renamed from youtube_dl/extractor/bokecc.py)0
-rw-r--r--youtube_dlc/extractor/bostonglobe.py (renamed from youtube_dl/extractor/bostonglobe.py)0
-rw-r--r--youtube_dlc/extractor/bpb.py (renamed from youtube_dl/extractor/bpb.py)0
-rw-r--r--youtube_dlc/extractor/br.py (renamed from youtube_dl/extractor/br.py)0
-rw-r--r--youtube_dlc/extractor/bravotv.py (renamed from youtube_dl/extractor/bravotv.py)0
-rw-r--r--youtube_dlc/extractor/breakcom.py (renamed from youtube_dl/extractor/breakcom.py)0
-rw-r--r--youtube_dlc/extractor/brightcove.py677
-rw-r--r--youtube_dlc/extractor/businessinsider.py48
-rw-r--r--youtube_dlc/extractor/buzzfeed.py (renamed from youtube_dl/extractor/buzzfeed.py)0
-rw-r--r--youtube_dlc/extractor/byutv.py (renamed from youtube_dl/extractor/byutv.py)0
-rw-r--r--youtube_dlc/extractor/c56.py (renamed from youtube_dl/extractor/c56.py)0
-rw-r--r--youtube_dlc/extractor/camdemy.py (renamed from youtube_dl/extractor/camdemy.py)0
-rw-r--r--youtube_dlc/extractor/cammodels.py (renamed from youtube_dl/extractor/cammodels.py)0
-rw-r--r--youtube_dlc/extractor/camtube.py (renamed from youtube_dl/extractor/camtube.py)0
-rw-r--r--youtube_dlc/extractor/camwithher.py (renamed from youtube_dl/extractor/camwithher.py)0
-rw-r--r--youtube_dlc/extractor/canalc2.py (renamed from youtube_dl/extractor/canalc2.py)0
-rw-r--r--youtube_dlc/extractor/canalplus.py (renamed from youtube_dl/extractor/canalplus.py)0
-rw-r--r--youtube_dlc/extractor/canvas.py368
-rw-r--r--youtube_dlc/extractor/carambatv.py (renamed from youtube_dl/extractor/carambatv.py)0
-rw-r--r--youtube_dlc/extractor/cartoonnetwork.py (renamed from youtube_dl/extractor/cartoonnetwork.py)0
-rw-r--r--youtube_dlc/extractor/cbc.py497
-rw-r--r--youtube_dlc/extractor/cbs.py (renamed from youtube_dl/extractor/cbs.py)0
-rw-r--r--youtube_dlc/extractor/cbsinteractive.py (renamed from youtube_dl/extractor/cbsinteractive.py)0
-rw-r--r--youtube_dlc/extractor/cbslocal.py (renamed from youtube_dl/extractor/cbslocal.py)0
-rw-r--r--youtube_dlc/extractor/cbsnews.py (renamed from youtube_dl/extractor/cbsnews.py)0
-rw-r--r--youtube_dlc/extractor/cbssports.py (renamed from youtube_dl/extractor/cbssports.py)0
-rw-r--r--youtube_dlc/extractor/ccc.py (renamed from youtube_dl/extractor/ccc.py)0
-rw-r--r--youtube_dlc/extractor/ccma.py (renamed from youtube_dl/extractor/ccma.py)0
-rw-r--r--youtube_dlc/extractor/cctv.py (renamed from youtube_dl/extractor/cctv.py)0
-rw-r--r--youtube_dlc/extractor/cda.py (renamed from youtube_dl/extractor/cda.py)0
-rw-r--r--youtube_dlc/extractor/ceskatelevize.py289
-rw-r--r--youtube_dlc/extractor/channel9.py262
-rw-r--r--youtube_dlc/extractor/charlierose.py (renamed from youtube_dl/extractor/charlierose.py)0
-rw-r--r--youtube_dlc/extractor/chaturbate.py109
-rw-r--r--youtube_dlc/extractor/chilloutzone.py (renamed from youtube_dl/extractor/chilloutzone.py)0
-rw-r--r--youtube_dlc/extractor/chirbit.py (renamed from youtube_dl/extractor/chirbit.py)0
-rw-r--r--youtube_dlc/extractor/cinchcast.py (renamed from youtube_dl/extractor/cinchcast.py)0
-rw-r--r--youtube_dlc/extractor/cinemax.py (renamed from youtube_dl/extractor/cinemax.py)0
-rw-r--r--youtube_dlc/extractor/ciscolive.py (renamed from youtube_dl/extractor/ciscolive.py)0
-rw-r--r--youtube_dlc/extractor/cjsw.py (renamed from youtube_dl/extractor/cjsw.py)0
-rw-r--r--youtube_dlc/extractor/cliphunter.py (renamed from youtube_dl/extractor/cliphunter.py)0
-rw-r--r--youtube_dlc/extractor/clippit.py (renamed from youtube_dl/extractor/clippit.py)0
-rw-r--r--youtube_dlc/extractor/cliprs.py (renamed from youtube_dl/extractor/cliprs.py)0
-rw-r--r--youtube_dlc/extractor/clipsyndicate.py (renamed from youtube_dl/extractor/clipsyndicate.py)0
-rw-r--r--youtube_dlc/extractor/closertotruth.py (renamed from youtube_dl/extractor/closertotruth.py)0
-rw-r--r--youtube_dlc/extractor/cloudflarestream.py72
-rw-r--r--youtube_dlc/extractor/cloudy.py (renamed from youtube_dl/extractor/cloudy.py)0
-rw-r--r--youtube_dlc/extractor/clubic.py (renamed from youtube_dl/extractor/clubic.py)0
-rw-r--r--youtube_dlc/extractor/clyp.py (renamed from youtube_dl/extractor/clyp.py)0
-rw-r--r--youtube_dlc/extractor/cmt.py (renamed from youtube_dl/extractor/cmt.py)0
-rw-r--r--youtube_dlc/extractor/cnbc.py (renamed from youtube_dl/extractor/cnbc.py)0
-rw-r--r--youtube_dlc/extractor/cnn.py (renamed from youtube_dl/extractor/cnn.py)0
-rw-r--r--youtube_dlc/extractor/comedycentral.py (renamed from youtube_dl/extractor/comedycentral.py)0
-rw-r--r--youtube_dlc/extractor/common.py3013
-rw-r--r--youtube_dlc/extractor/commonmistakes.py50
-rw-r--r--youtube_dlc/extractor/commonprotocols.py (renamed from youtube_dl/extractor/commonprotocols.py)0
-rw-r--r--youtube_dlc/extractor/condenast.py (renamed from youtube_dl/extractor/condenast.py)0
-rw-r--r--youtube_dlc/extractor/contv.py118
-rw-r--r--youtube_dlc/extractor/corus.py160
-rw-r--r--youtube_dlc/extractor/coub.py (renamed from youtube_dl/extractor/coub.py)0
-rw-r--r--youtube_dlc/extractor/cracked.py (renamed from youtube_dl/extractor/cracked.py)0
-rw-r--r--youtube_dlc/extractor/crackle.py (renamed from youtube_dl/extractor/crackle.py)0
-rw-r--r--youtube_dlc/extractor/crooksandliars.py (renamed from youtube_dl/extractor/crooksandliars.py)0
-rw-r--r--youtube_dlc/extractor/crunchyroll.py686
-rw-r--r--youtube_dlc/extractor/cspan.py (renamed from youtube_dl/extractor/cspan.py)0
-rw-r--r--youtube_dlc/extractor/ctsnews.py (renamed from youtube_dl/extractor/ctsnews.py)0
-rw-r--r--youtube_dlc/extractor/ctvnews.py (renamed from youtube_dl/extractor/ctvnews.py)0
-rw-r--r--youtube_dlc/extractor/cultureunplugged.py (renamed from youtube_dl/extractor/cultureunplugged.py)0
-rw-r--r--youtube_dlc/extractor/curiositystream.py (renamed from youtube_dl/extractor/curiositystream.py)0
-rw-r--r--youtube_dlc/extractor/cwtv.py (renamed from youtube_dl/extractor/cwtv.py)0
-rw-r--r--youtube_dlc/extractor/dailymail.py (renamed from youtube_dl/extractor/dailymail.py)0
-rw-r--r--youtube_dlc/extractor/dailymotion.py393
-rw-r--r--youtube_dlc/extractor/daum.py266
-rw-r--r--youtube_dlc/extractor/dbtv.py (renamed from youtube_dl/extractor/dbtv.py)0
-rw-r--r--youtube_dlc/extractor/dctp.py105
-rw-r--r--youtube_dlc/extractor/deezer.py (renamed from youtube_dl/extractor/deezer.py)0
-rw-r--r--youtube_dlc/extractor/defense.py (renamed from youtube_dl/extractor/defense.py)0
-rw-r--r--youtube_dlc/extractor/democracynow.py (renamed from youtube_dl/extractor/democracynow.py)0
-rw-r--r--youtube_dlc/extractor/dfb.py (renamed from youtube_dl/extractor/dfb.py)0
-rw-r--r--youtube_dlc/extractor/dhm.py (renamed from youtube_dl/extractor/dhm.py)0
-rw-r--r--youtube_dlc/extractor/digg.py (renamed from youtube_dl/extractor/digg.py)0
-rw-r--r--youtube_dlc/extractor/digiteka.py (renamed from youtube_dl/extractor/digiteka.py)0
-rw-r--r--youtube_dlc/extractor/discovery.py118
-rw-r--r--youtube_dlc/extractor/discoverygo.py (renamed from youtube_dl/extractor/discoverygo.py)0
-rw-r--r--youtube_dlc/extractor/discoverynetworks.py40
-rw-r--r--youtube_dlc/extractor/discoveryvr.py (renamed from youtube_dl/extractor/discoveryvr.py)0
-rw-r--r--youtube_dlc/extractor/disney.py (renamed from youtube_dl/extractor/disney.py)0
-rw-r--r--youtube_dlc/extractor/dispeak.py (renamed from youtube_dl/extractor/dispeak.py)0
-rw-r--r--youtube_dlc/extractor/dlive.py (renamed from youtube_dl/extractor/dlive.py)0
-rw-r--r--youtube_dlc/extractor/doodstream.py71
-rw-r--r--youtube_dlc/extractor/dotsub.py (renamed from youtube_dl/extractor/dotsub.py)0
-rw-r--r--youtube_dlc/extractor/douyutv.py (renamed from youtube_dl/extractor/douyutv.py)0
-rw-r--r--youtube_dlc/extractor/dplay.py247
-rw-r--r--youtube_dlc/extractor/drbonanza.py (renamed from youtube_dl/extractor/drbonanza.py)0
-rw-r--r--youtube_dlc/extractor/dropbox.py40
-rw-r--r--youtube_dlc/extractor/drtuber.py (renamed from youtube_dl/extractor/drtuber.py)0
-rw-r--r--youtube_dlc/extractor/drtv.py352
-rw-r--r--youtube_dlc/extractor/dtube.py (renamed from youtube_dl/extractor/dtube.py)0
-rw-r--r--youtube_dlc/extractor/dumpert.py80
-rw-r--r--youtube_dlc/extractor/dvtv.py (renamed from youtube_dl/extractor/dvtv.py)0
-rw-r--r--youtube_dlc/extractor/dw.py (renamed from youtube_dl/extractor/dw.py)0
-rw-r--r--youtube_dlc/extractor/eagleplatform.py (renamed from youtube_dl/extractor/eagleplatform.py)0
-rw-r--r--youtube_dlc/extractor/ebaumsworld.py (renamed from youtube_dl/extractor/ebaumsworld.py)0
-rw-r--r--youtube_dlc/extractor/echomsk.py (renamed from youtube_dl/extractor/echomsk.py)0
-rw-r--r--youtube_dlc/extractor/egghead.py (renamed from youtube_dl/extractor/egghead.py)0
-rw-r--r--youtube_dlc/extractor/ehow.py (renamed from youtube_dl/extractor/ehow.py)0
-rw-r--r--youtube_dlc/extractor/eighttracks.py164
-rw-r--r--youtube_dlc/extractor/einthusan.py (renamed from youtube_dl/extractor/einthusan.py)0
-rw-r--r--youtube_dlc/extractor/eitb.py (renamed from youtube_dl/extractor/eitb.py)0
-rw-r--r--youtube_dlc/extractor/ellentube.py (renamed from youtube_dl/extractor/ellentube.py)0
-rw-r--r--youtube_dlc/extractor/elpais.py (renamed from youtube_dl/extractor/elpais.py)0
-rw-r--r--youtube_dlc/extractor/embedly.py (renamed from youtube_dl/extractor/embedly.py)0
-rw-r--r--youtube_dlc/extractor/engadget.py (renamed from youtube_dl/extractor/engadget.py)0
-rw-r--r--youtube_dlc/extractor/eporner.py129
-rw-r--r--youtube_dlc/extractor/eroprofile.py (renamed from youtube_dl/extractor/eroprofile.py)0
-rw-r--r--youtube_dlc/extractor/escapist.py (renamed from youtube_dl/extractor/escapist.py)0
-rw-r--r--youtube_dlc/extractor/espn.py (renamed from youtube_dl/extractor/espn.py)0
-rw-r--r--youtube_dlc/extractor/esri.py (renamed from youtube_dl/extractor/esri.py)0
-rw-r--r--youtube_dlc/extractor/europa.py (renamed from youtube_dl/extractor/europa.py)0
-rw-r--r--youtube_dlc/extractor/everyonesmixtape.py (renamed from youtube_dl/extractor/everyonesmixtape.py)0
-rw-r--r--youtube_dlc/extractor/expotv.py (renamed from youtube_dl/extractor/expotv.py)0
-rw-r--r--youtube_dlc/extractor/expressen.py (renamed from youtube_dl/extractor/expressen.py)0
-rw-r--r--youtube_dlc/extractor/extractors.py1528
-rw-r--r--youtube_dlc/extractor/extremetube.py (renamed from youtube_dl/extractor/extremetube.py)0
-rw-r--r--youtube_dlc/extractor/eyedotv.py (renamed from youtube_dl/extractor/eyedotv.py)0
-rw-r--r--youtube_dlc/extractor/facebook.py514
-rw-r--r--youtube_dlc/extractor/faz.py (renamed from youtube_dl/extractor/faz.py)0
-rw-r--r--youtube_dlc/extractor/fc2.py (renamed from youtube_dl/extractor/fc2.py)0
-rw-r--r--youtube_dlc/extractor/fczenit.py (renamed from youtube_dl/extractor/fczenit.py)0
-rw-r--r--youtube_dlc/extractor/filmon.py (renamed from youtube_dl/extractor/filmon.py)0
-rw-r--r--youtube_dlc/extractor/filmweb.py (renamed from youtube_dl/extractor/filmweb.py)0
-rw-r--r--youtube_dlc/extractor/firsttv.py (renamed from youtube_dl/extractor/firsttv.py)0
-rw-r--r--youtube_dlc/extractor/fivemin.py (renamed from youtube_dl/extractor/fivemin.py)0
-rw-r--r--youtube_dlc/extractor/fivetv.py (renamed from youtube_dl/extractor/fivetv.py)0
-rw-r--r--youtube_dlc/extractor/flickr.py (renamed from youtube_dl/extractor/flickr.py)0
-rw-r--r--youtube_dlc/extractor/folketinget.py (renamed from youtube_dl/extractor/folketinget.py)0
-rw-r--r--youtube_dlc/extractor/footyroom.py (renamed from youtube_dl/extractor/footyroom.py)0
-rw-r--r--youtube_dlc/extractor/formula1.py (renamed from youtube_dl/extractor/formula1.py)0
-rw-r--r--youtube_dlc/extractor/fourtube.py (renamed from youtube_dl/extractor/fourtube.py)0
-rw-r--r--youtube_dlc/extractor/fox.py (renamed from youtube_dl/extractor/fox.py)0
-rw-r--r--youtube_dlc/extractor/fox9.py41
-rw-r--r--youtube_dlc/extractor/foxgay.py (renamed from youtube_dl/extractor/foxgay.py)0
-rw-r--r--youtube_dlc/extractor/foxnews.py (renamed from youtube_dl/extractor/foxnews.py)0
-rw-r--r--youtube_dlc/extractor/foxsports.py (renamed from youtube_dl/extractor/foxsports.py)0
-rw-r--r--youtube_dlc/extractor/franceculture.py69
-rw-r--r--youtube_dlc/extractor/franceinter.py (renamed from youtube_dl/extractor/franceinter.py)0
-rw-r--r--youtube_dlc/extractor/francetv.py518
-rw-r--r--youtube_dlc/extractor/freesound.py (renamed from youtube_dl/extractor/freesound.py)0
-rw-r--r--youtube_dlc/extractor/freespeech.py (renamed from youtube_dl/extractor/freespeech.py)0
-rw-r--r--youtube_dlc/extractor/freshlive.py (renamed from youtube_dl/extractor/freshlive.py)0
-rw-r--r--youtube_dlc/extractor/frontendmasters.py (renamed from youtube_dl/extractor/frontendmasters.py)0
-rw-r--r--youtube_dlc/extractor/funimation.py (renamed from youtube_dl/extractor/funimation.py)0
-rw-r--r--youtube_dlc/extractor/funk.py (renamed from youtube_dl/extractor/funk.py)0
-rw-r--r--youtube_dlc/extractor/fusion.py (renamed from youtube_dl/extractor/fusion.py)0
-rw-r--r--youtube_dlc/extractor/fxnetworks.py (renamed from youtube_dl/extractor/fxnetworks.py)0
-rw-r--r--youtube_dlc/extractor/gaia.py (renamed from youtube_dl/extractor/gaia.py)0
-rw-r--r--youtube_dlc/extractor/gameinformer.py (renamed from youtube_dl/extractor/gameinformer.py)0
-rw-r--r--youtube_dlc/extractor/gamespot.py (renamed from youtube_dl/extractor/gamespot.py)0
-rw-r--r--youtube_dlc/extractor/gamestar.py (renamed from youtube_dl/extractor/gamestar.py)0
-rw-r--r--youtube_dlc/extractor/gaskrank.py (renamed from youtube_dl/extractor/gaskrank.py)0
-rw-r--r--youtube_dlc/extractor/gazeta.py (renamed from youtube_dl/extractor/gazeta.py)0
-rw-r--r--youtube_dlc/extractor/gdcvault.py (renamed from youtube_dl/extractor/gdcvault.py)0
-rw-r--r--youtube_dlc/extractor/generic.py3459
-rw-r--r--youtube_dlc/extractor/gfycat.py (renamed from youtube_dl/extractor/gfycat.py)0
-rw-r--r--youtube_dlc/extractor/giantbomb.py90
-rw-r--r--youtube_dlc/extractor/giga.py (renamed from youtube_dl/extractor/giga.py)0
-rw-r--r--youtube_dlc/extractor/gigya.py (renamed from youtube_dl/extractor/gigya.py)0
-rw-r--r--youtube_dlc/extractor/glide.py (renamed from youtube_dl/extractor/glide.py)0
-rw-r--r--youtube_dlc/extractor/globo.py240
-rw-r--r--youtube_dlc/extractor/go.py268
-rw-r--r--youtube_dlc/extractor/godtube.py (renamed from youtube_dl/extractor/godtube.py)0
-rw-r--r--youtube_dlc/extractor/golem.py (renamed from youtube_dl/extractor/golem.py)0
-rw-r--r--youtube_dlc/extractor/googledrive.py (renamed from youtube_dl/extractor/googledrive.py)0
-rw-r--r--youtube_dlc/extractor/googleplus.py (renamed from youtube_dl/extractor/googleplus.py)0
-rw-r--r--youtube_dlc/extractor/googlesearch.py (renamed from youtube_dl/extractor/googlesearch.py)0
-rw-r--r--youtube_dlc/extractor/goshgay.py (renamed from youtube_dl/extractor/goshgay.py)0
-rw-r--r--youtube_dlc/extractor/gputechconf.py (renamed from youtube_dl/extractor/gputechconf.py)0
-rw-r--r--youtube_dlc/extractor/groupon.py (renamed from youtube_dl/extractor/groupon.py)0
-rw-r--r--youtube_dlc/extractor/hbo.py (renamed from youtube_dl/extractor/hbo.py)0
-rw-r--r--youtube_dlc/extractor/hearthisat.py (renamed from youtube_dl/extractor/hearthisat.py)0
-rw-r--r--youtube_dlc/extractor/heise.py (renamed from youtube_dl/extractor/heise.py)0
-rw-r--r--youtube_dlc/extractor/hellporno.py76
-rw-r--r--youtube_dlc/extractor/helsinki.py (renamed from youtube_dl/extractor/helsinki.py)0
-rw-r--r--youtube_dlc/extractor/hentaistigma.py (renamed from youtube_dl/extractor/hentaistigma.py)0
-rw-r--r--youtube_dlc/extractor/hgtv.py (renamed from youtube_dl/extractor/hgtv.py)0
-rw-r--r--youtube_dlc/extractor/hidive.py (renamed from youtube_dl/extractor/hidive.py)0
-rw-r--r--youtube_dlc/extractor/historicfilms.py (renamed from youtube_dl/extractor/historicfilms.py)0
-rw-r--r--youtube_dlc/extractor/hitbox.py (renamed from youtube_dl/extractor/hitbox.py)0
-rw-r--r--youtube_dlc/extractor/hitrecord.py (renamed from youtube_dl/extractor/hitrecord.py)0
-rw-r--r--youtube_dlc/extractor/hketv.py (renamed from youtube_dl/extractor/hketv.py)0
-rw-r--r--youtube_dlc/extractor/hornbunny.py (renamed from youtube_dl/extractor/hornbunny.py)0
-rw-r--r--youtube_dlc/extractor/hotnewhiphop.py (renamed from youtube_dl/extractor/hotnewhiphop.py)0
-rw-r--r--youtube_dlc/extractor/hotstar.py210
-rw-r--r--youtube_dlc/extractor/howcast.py (renamed from youtube_dl/extractor/howcast.py)0
-rw-r--r--youtube_dlc/extractor/howstuffworks.py (renamed from youtube_dl/extractor/howstuffworks.py)0
-rw-r--r--youtube_dlc/extractor/hrfensehen.py102
-rw-r--r--youtube_dlc/extractor/hrti.py (renamed from youtube_dl/extractor/hrti.py)0
-rw-r--r--youtube_dlc/extractor/huajiao.py (renamed from youtube_dl/extractor/huajiao.py)0
-rw-r--r--youtube_dlc/extractor/huffpost.py (renamed from youtube_dl/extractor/huffpost.py)0
-rw-r--r--youtube_dlc/extractor/hungama.py (renamed from youtube_dl/extractor/hungama.py)0
-rw-r--r--youtube_dlc/extractor/hypem.py (renamed from youtube_dl/extractor/hypem.py)0
-rw-r--r--youtube_dlc/extractor/ign.py (renamed from youtube_dl/extractor/ign.py)0
-rw-r--r--youtube_dlc/extractor/imdb.py147
-rw-r--r--youtube_dlc/extractor/imggaming.py133
-rw-r--r--youtube_dlc/extractor/imgur.py154
-rw-r--r--youtube_dlc/extractor/ina.py (renamed from youtube_dl/extractor/ina.py)0
-rw-r--r--youtube_dlc/extractor/inc.py (renamed from youtube_dl/extractor/inc.py)0
-rw-r--r--youtube_dlc/extractor/indavideo.py128
-rw-r--r--youtube_dlc/extractor/infoq.py (renamed from youtube_dl/extractor/infoq.py)0
-rw-r--r--youtube_dlc/extractor/instagram.py (renamed from youtube_dl/extractor/instagram.py)0
-rw-r--r--youtube_dlc/extractor/internazionale.py (renamed from youtube_dl/extractor/internazionale.py)0
-rw-r--r--youtube_dlc/extractor/internetvideoarchive.py64
-rw-r--r--youtube_dlc/extractor/iprima.py148
-rw-r--r--youtube_dlc/extractor/iqiyi.py (renamed from youtube_dl/extractor/iqiyi.py)0
-rw-r--r--youtube_dlc/extractor/ir90tv.py (renamed from youtube_dl/extractor/ir90tv.py)0
-rw-r--r--youtube_dlc/extractor/itv.py (renamed from youtube_dl/extractor/itv.py)0
-rw-r--r--youtube_dlc/extractor/ivi.py271
-rw-r--r--youtube_dlc/extractor/ivideon.py (renamed from youtube_dl/extractor/ivideon.py)0
-rw-r--r--youtube_dlc/extractor/iwara.py (renamed from youtube_dl/extractor/iwara.py)0
-rw-r--r--youtube_dlc/extractor/izlesene.py (renamed from youtube_dl/extractor/izlesene.py)0
-rw-r--r--youtube_dlc/extractor/jamendo.py187
-rw-r--r--youtube_dlc/extractor/jeuxvideo.py (renamed from youtube_dl/extractor/jeuxvideo.py)0
-rw-r--r--youtube_dlc/extractor/joj.py108
-rw-r--r--youtube_dlc/extractor/jove.py (renamed from youtube_dl/extractor/jove.py)0
-rw-r--r--youtube_dlc/extractor/jwplatform.py46
-rw-r--r--youtube_dlc/extractor/kakao.py147
-rw-r--r--youtube_dlc/extractor/kaltura.py377
-rw-r--r--youtube_dlc/extractor/kanalplay.py (renamed from youtube_dl/extractor/kanalplay.py)0
-rw-r--r--youtube_dlc/extractor/kankan.py (renamed from youtube_dl/extractor/kankan.py)0
-rw-r--r--youtube_dlc/extractor/karaoketv.py (renamed from youtube_dl/extractor/karaoketv.py)0
-rw-r--r--youtube_dlc/extractor/karrierevideos.py (renamed from youtube_dl/extractor/karrierevideos.py)0
-rw-r--r--youtube_dlc/extractor/keezmovies.py (renamed from youtube_dl/extractor/keezmovies.py)0
-rw-r--r--youtube_dlc/extractor/ketnet.py (renamed from youtube_dl/extractor/ketnet.py)0
-rw-r--r--youtube_dlc/extractor/khanacademy.py (renamed from youtube_dl/extractor/khanacademy.py)0
-rw-r--r--youtube_dlc/extractor/kickstarter.py (renamed from youtube_dl/extractor/kickstarter.py)0
-rw-r--r--youtube_dlc/extractor/kinja.py221
-rw-r--r--youtube_dlc/extractor/kinopoisk.py (renamed from youtube_dl/extractor/kinopoisk.py)0
-rw-r--r--youtube_dlc/extractor/konserthusetplay.py (renamed from youtube_dl/extractor/konserthusetplay.py)0
-rw-r--r--youtube_dlc/extractor/krasview.py (renamed from youtube_dl/extractor/krasview.py)0
-rw-r--r--youtube_dlc/extractor/ku6.py (renamed from youtube_dl/extractor/ku6.py)0
-rw-r--r--youtube_dlc/extractor/kusi.py (renamed from youtube_dl/extractor/kusi.py)0
-rw-r--r--youtube_dlc/extractor/kuwo.py (renamed from youtube_dl/extractor/kuwo.py)0
-rw-r--r--youtube_dlc/extractor/la7.py67
-rw-r--r--youtube_dlc/extractor/laola1tv.py (renamed from youtube_dl/extractor/laola1tv.py)0
-rw-r--r--youtube_dlc/extractor/lci.py (renamed from youtube_dl/extractor/lci.py)0
-rw-r--r--youtube_dlc/extractor/lcp.py (renamed from youtube_dl/extractor/lcp.py)0
-rw-r--r--youtube_dlc/extractor/lecture2go.py (renamed from youtube_dl/extractor/lecture2go.py)0
-rw-r--r--youtube_dlc/extractor/lecturio.py243
-rw-r--r--youtube_dlc/extractor/leeco.py (renamed from youtube_dl/extractor/leeco.py)0
-rw-r--r--youtube_dlc/extractor/lego.py149
-rw-r--r--youtube_dlc/extractor/lemonde.py (renamed from youtube_dl/extractor/lemonde.py)0
-rw-r--r--youtube_dlc/extractor/lenta.py (renamed from youtube_dl/extractor/lenta.py)0
-rw-r--r--youtube_dlc/extractor/libraryofcongress.py (renamed from youtube_dl/extractor/libraryofcongress.py)0
-rw-r--r--youtube_dlc/extractor/libsyn.py (renamed from youtube_dl/extractor/libsyn.py)0
-rw-r--r--youtube_dlc/extractor/lifenews.py (renamed from youtube_dl/extractor/lifenews.py)0
-rw-r--r--youtube_dlc/extractor/limelight.py358
-rw-r--r--youtube_dlc/extractor/line.py (renamed from youtube_dl/extractor/line.py)0
-rw-r--r--youtube_dlc/extractor/linkedin.py (renamed from youtube_dl/extractor/linkedin.py)0
-rw-r--r--youtube_dlc/extractor/linuxacademy.py173
-rw-r--r--youtube_dlc/extractor/litv.py (renamed from youtube_dl/extractor/litv.py)0
-rw-r--r--youtube_dlc/extractor/livejournal.py (renamed from youtube_dl/extractor/livejournal.py)0
-rw-r--r--youtube_dlc/extractor/liveleak.py (renamed from youtube_dl/extractor/liveleak.py)0
-rw-r--r--youtube_dlc/extractor/livestream.py (renamed from youtube_dl/extractor/livestream.py)0
-rw-r--r--youtube_dlc/extractor/lnkgo.py88
-rw-r--r--youtube_dlc/extractor/localnews8.py (renamed from youtube_dl/extractor/localnews8.py)0
-rw-r--r--youtube_dlc/extractor/lovehomeporn.py (renamed from youtube_dl/extractor/lovehomeporn.py)0
-rw-r--r--youtube_dlc/extractor/lrt.py (renamed from youtube_dl/extractor/lrt.py)0
-rw-r--r--youtube_dlc/extractor/lynda.py (renamed from youtube_dl/extractor/lynda.py)0
-rw-r--r--youtube_dlc/extractor/m6.py (renamed from youtube_dl/extractor/m6.py)0
-rw-r--r--youtube_dlc/extractor/mailru.py329
-rw-r--r--youtube_dlc/extractor/malltv.py56
-rw-r--r--youtube_dlc/extractor/mangomolo.py58
-rw-r--r--youtube_dlc/extractor/manyvids.py (renamed from youtube_dl/extractor/manyvids.py)0
-rw-r--r--youtube_dlc/extractor/markiza.py (renamed from youtube_dl/extractor/markiza.py)0
-rw-r--r--youtube_dlc/extractor/massengeschmacktv.py (renamed from youtube_dl/extractor/massengeschmacktv.py)0
-rw-r--r--youtube_dlc/extractor/matchtv.py (renamed from youtube_dl/extractor/matchtv.py)0
-rw-r--r--youtube_dlc/extractor/mdr.py (renamed from youtube_dl/extractor/mdr.py)0
-rw-r--r--youtube_dlc/extractor/medialaan.py (renamed from youtube_dl/extractor/medialaan.py)0
-rw-r--r--youtube_dlc/extractor/mediaset.py179
-rw-r--r--youtube_dlc/extractor/mediasite.py366
-rw-r--r--youtube_dlc/extractor/medici.py (renamed from youtube_dl/extractor/medici.py)0
-rw-r--r--youtube_dlc/extractor/megaphone.py (renamed from youtube_dl/extractor/megaphone.py)0
-rw-r--r--youtube_dlc/extractor/meipai.py (renamed from youtube_dl/extractor/meipai.py)0
-rw-r--r--youtube_dlc/extractor/melonvod.py (renamed from youtube_dl/extractor/melonvod.py)0
-rw-r--r--youtube_dlc/extractor/meta.py (renamed from youtube_dl/extractor/meta.py)0
-rw-r--r--youtube_dlc/extractor/metacafe.py (renamed from youtube_dl/extractor/metacafe.py)0
-rw-r--r--youtube_dlc/extractor/metacritic.py (renamed from youtube_dl/extractor/metacritic.py)0
-rw-r--r--youtube_dlc/extractor/mgoon.py (renamed from youtube_dl/extractor/mgoon.py)0
-rw-r--r--youtube_dlc/extractor/mgtv.py (renamed from youtube_dl/extractor/mgtv.py)0
-rw-r--r--youtube_dlc/extractor/miaopai.py (renamed from youtube_dl/extractor/miaopai.py)0
-rw-r--r--youtube_dlc/extractor/microsoftvirtualacademy.py (renamed from youtube_dl/extractor/microsoftvirtualacademy.py)0
-rw-r--r--youtube_dlc/extractor/ministrygrid.py (renamed from youtube_dl/extractor/ministrygrid.py)0
-rw-r--r--youtube_dlc/extractor/minoto.py (renamed from youtube_dl/extractor/minoto.py)0
-rw-r--r--youtube_dlc/extractor/miomio.py (renamed from youtube_dl/extractor/miomio.py)0
-rw-r--r--youtube_dlc/extractor/mit.py132
-rw-r--r--youtube_dlc/extractor/mitele.py93
-rw-r--r--youtube_dlc/extractor/mixcloud.py351
-rw-r--r--youtube_dlc/extractor/mlb.py (renamed from youtube_dl/extractor/mlb.py)0
-rw-r--r--youtube_dlc/extractor/mnet.py (renamed from youtube_dl/extractor/mnet.py)0
-rw-r--r--youtube_dlc/extractor/moevideo.py (renamed from youtube_dl/extractor/moevideo.py)0
-rw-r--r--youtube_dlc/extractor/mofosex.py79
-rw-r--r--youtube_dlc/extractor/mojvideo.py (renamed from youtube_dl/extractor/mojvideo.py)0
-rw-r--r--youtube_dlc/extractor/morningstar.py (renamed from youtube_dl/extractor/morningstar.py)0
-rw-r--r--youtube_dlc/extractor/motherless.py207
-rw-r--r--youtube_dlc/extractor/motorsport.py (renamed from youtube_dl/extractor/motorsport.py)0
-rw-r--r--youtube_dlc/extractor/movieclips.py (renamed from youtube_dl/extractor/movieclips.py)0
-rw-r--r--youtube_dlc/extractor/moviezine.py (renamed from youtube_dl/extractor/moviezine.py)0
-rw-r--r--youtube_dlc/extractor/movingimage.py (renamed from youtube_dl/extractor/movingimage.py)0
-rw-r--r--youtube_dlc/extractor/msn.py171
-rw-r--r--youtube_dlc/extractor/mtv.py474
-rw-r--r--youtube_dlc/extractor/muenchentv.py (renamed from youtube_dl/extractor/muenchentv.py)0
-rw-r--r--youtube_dlc/extractor/mwave.py (renamed from youtube_dl/extractor/mwave.py)0
-rw-r--r--youtube_dlc/extractor/mychannels.py (renamed from youtube_dl/extractor/mychannels.py)0
-rw-r--r--youtube_dlc/extractor/myspace.py (renamed from youtube_dl/extractor/myspace.py)0
-rw-r--r--youtube_dlc/extractor/myspass.py56
-rw-r--r--youtube_dlc/extractor/myvi.py (renamed from youtube_dl/extractor/myvi.py)0
-rw-r--r--youtube_dlc/extractor/myvidster.py (renamed from youtube_dl/extractor/myvidster.py)0
-rw-r--r--youtube_dlc/extractor/nationalgeographic.py (renamed from youtube_dl/extractor/nationalgeographic.py)0
-rw-r--r--youtube_dlc/extractor/naver.py166
-rw-r--r--youtube_dlc/extractor/nba.py (renamed from youtube_dl/extractor/nba.py)0
-rw-r--r--youtube_dlc/extractor/nbc.py541
-rw-r--r--youtube_dlc/extractor/ndr.py402
-rw-r--r--youtube_dlc/extractor/ndtv.py (renamed from youtube_dl/extractor/ndtv.py)0
-rw-r--r--youtube_dlc/extractor/nerdcubed.py (renamed from youtube_dl/extractor/nerdcubed.py)0
-rw-r--r--youtube_dlc/extractor/neteasemusic.py (renamed from youtube_dl/extractor/neteasemusic.py)0
-rw-r--r--youtube_dlc/extractor/netzkino.py (renamed from youtube_dl/extractor/netzkino.py)0
-rw-r--r--youtube_dlc/extractor/newgrounds.py (renamed from youtube_dl/extractor/newgrounds.py)0
-rw-r--r--youtube_dlc/extractor/newstube.py (renamed from youtube_dl/extractor/newstube.py)0
-rw-r--r--youtube_dlc/extractor/nextmedia.py (renamed from youtube_dl/extractor/nextmedia.py)0
-rw-r--r--youtube_dlc/extractor/nexx.py453
-rw-r--r--youtube_dlc/extractor/nfl.py (renamed from youtube_dl/extractor/nfl.py)0
-rw-r--r--youtube_dlc/extractor/nhk.py93
-rw-r--r--youtube_dlc/extractor/nhl.py (renamed from youtube_dl/extractor/nhl.py)0
-rw-r--r--youtube_dlc/extractor/nick.py (renamed from youtube_dl/extractor/nick.py)0
-rw-r--r--youtube_dlc/extractor/niconico.py (renamed from youtube_dl/extractor/niconico.py)0
-rw-r--r--youtube_dlc/extractor/ninecninemedia.py (renamed from youtube_dl/extractor/ninecninemedia.py)0
-rw-r--r--youtube_dlc/extractor/ninegag.py (renamed from youtube_dl/extractor/ninegag.py)0
-rw-r--r--youtube_dlc/extractor/ninenow.py (renamed from youtube_dl/extractor/ninenow.py)0
-rw-r--r--youtube_dlc/extractor/nintendo.py60
-rw-r--r--youtube_dlc/extractor/njpwworld.py (renamed from youtube_dl/extractor/njpwworld.py)0
-rw-r--r--youtube_dlc/extractor/nobelprize.py (renamed from youtube_dl/extractor/nobelprize.py)0
-rw-r--r--youtube_dlc/extractor/noco.py (renamed from youtube_dl/extractor/noco.py)0
-rw-r--r--youtube_dlc/extractor/nonktube.py (renamed from youtube_dl/extractor/nonktube.py)0
-rw-r--r--youtube_dlc/extractor/noovo.py (renamed from youtube_dl/extractor/noovo.py)0
-rw-r--r--youtube_dlc/extractor/normalboots.py (renamed from youtube_dl/extractor/normalboots.py)0
-rw-r--r--youtube_dlc/extractor/nosvideo.py (renamed from youtube_dl/extractor/nosvideo.py)0
-rw-r--r--youtube_dlc/extractor/nova.py305
-rw-r--r--youtube_dlc/extractor/nowness.py147
-rw-r--r--youtube_dlc/extractor/noz.py (renamed from youtube_dl/extractor/noz.py)0
-rw-r--r--youtube_dlc/extractor/npo.py (renamed from youtube_dl/extractor/npo.py)0
-rw-r--r--youtube_dlc/extractor/npr.py124
-rw-r--r--youtube_dlc/extractor/nrk.py717
-rw-r--r--youtube_dlc/extractor/nrl.py30
-rw-r--r--youtube_dlc/extractor/ntvcojp.py (renamed from youtube_dl/extractor/ntvcojp.py)0
-rw-r--r--youtube_dlc/extractor/ntvde.py (renamed from youtube_dl/extractor/ntvde.py)0
-rw-r--r--youtube_dlc/extractor/ntvru.py131
-rw-r--r--youtube_dlc/extractor/nuevo.py (renamed from youtube_dl/extractor/nuevo.py)0
-rw-r--r--youtube_dlc/extractor/nuvid.py (renamed from youtube_dl/extractor/nuvid.py)0
-rw-r--r--youtube_dlc/extractor/nytimes.py223
-rw-r--r--youtube_dlc/extractor/nzz.py (renamed from youtube_dl/extractor/nzz.py)0
-rw-r--r--youtube_dlc/extractor/odatv.py (renamed from youtube_dl/extractor/odatv.py)0
-rw-r--r--youtube_dlc/extractor/odnoklassniki.py268
-rw-r--r--youtube_dlc/extractor/oktoberfesttv.py (renamed from youtube_dl/extractor/oktoberfesttv.py)0
-rw-r--r--youtube_dlc/extractor/once.py (renamed from youtube_dl/extractor/once.py)0
-rw-r--r--youtube_dlc/extractor/ondemandkorea.py (renamed from youtube_dl/extractor/ondemandkorea.py)0
-rw-r--r--youtube_dlc/extractor/onet.py268
-rw-r--r--youtube_dlc/extractor/onionstudios.py53
-rw-r--r--youtube_dlc/extractor/ooyala.py210
-rw-r--r--youtube_dlc/extractor/openload.py238
-rw-r--r--youtube_dlc/extractor/ora.py (renamed from youtube_dl/extractor/ora.py)0
-rw-r--r--youtube_dlc/extractor/orf.py570
-rw-r--r--youtube_dlc/extractor/outsidetv.py (renamed from youtube_dl/extractor/outsidetv.py)0
-rw-r--r--youtube_dlc/extractor/packtpub.py (renamed from youtube_dl/extractor/packtpub.py)0
-rw-r--r--youtube_dlc/extractor/pandoratv.py (renamed from youtube_dl/extractor/pandoratv.py)0
-rw-r--r--youtube_dlc/extractor/parliamentliveuk.py (renamed from youtube_dl/extractor/parliamentliveuk.py)0
-rw-r--r--youtube_dlc/extractor/patreon.py156
-rw-r--r--youtube_dlc/extractor/pbs.py (renamed from youtube_dl/extractor/pbs.py)0
-rw-r--r--youtube_dlc/extractor/pearvideo.py (renamed from youtube_dl/extractor/pearvideo.py)0
-rw-r--r--youtube_dlc/extractor/peertube.py600
-rw-r--r--youtube_dlc/extractor/people.py (renamed from youtube_dl/extractor/people.py)0
-rw-r--r--youtube_dlc/extractor/performgroup.py (renamed from youtube_dl/extractor/performgroup.py)0
-rw-r--r--youtube_dlc/extractor/periscope.py189
-rw-r--r--youtube_dlc/extractor/philharmoniedeparis.py (renamed from youtube_dl/extractor/philharmoniedeparis.py)0
-rw-r--r--youtube_dlc/extractor/phoenix.py52
-rw-r--r--youtube_dlc/extractor/photobucket.py (renamed from youtube_dl/extractor/photobucket.py)0
-rw-r--r--youtube_dlc/extractor/picarto.py (renamed from youtube_dl/extractor/picarto.py)0
-rw-r--r--youtube_dlc/extractor/piksel.py (renamed from youtube_dl/extractor/piksel.py)0
-rw-r--r--youtube_dlc/extractor/pinkbike.py (renamed from youtube_dl/extractor/pinkbike.py)0
-rw-r--r--youtube_dlc/extractor/pladform.py (renamed from youtube_dl/extractor/pladform.py)0
-rw-r--r--youtube_dlc/extractor/platzi.py224
-rw-r--r--youtube_dlc/extractor/playfm.py (renamed from youtube_dl/extractor/playfm.py)0
-rw-r--r--youtube_dlc/extractor/playplustv.py (renamed from youtube_dl/extractor/playplustv.py)0
-rw-r--r--youtube_dlc/extractor/plays.py (renamed from youtube_dl/extractor/plays.py)0
-rw-r--r--youtube_dlc/extractor/playtvak.py (renamed from youtube_dl/extractor/playtvak.py)0
-rw-r--r--youtube_dlc/extractor/playvid.py (renamed from youtube_dl/extractor/playvid.py)0
-rw-r--r--youtube_dlc/extractor/playwire.py (renamed from youtube_dl/extractor/playwire.py)0
-rw-r--r--youtube_dlc/extractor/pluralsight.py (renamed from youtube_dl/extractor/pluralsight.py)0
-rw-r--r--youtube_dlc/extractor/podomatic.py (renamed from youtube_dl/extractor/podomatic.py)0
-rw-r--r--youtube_dlc/extractor/pokemon.py138
-rw-r--r--youtube_dlc/extractor/polskieradio.py (renamed from youtube_dl/extractor/polskieradio.py)0
-rw-r--r--youtube_dlc/extractor/popcorntimes.py99
-rw-r--r--youtube_dlc/extractor/popcorntv.py (renamed from youtube_dl/extractor/popcorntv.py)0
-rw-r--r--youtube_dlc/extractor/porn91.py (renamed from youtube_dl/extractor/porn91.py)0
-rw-r--r--youtube_dlc/extractor/porncom.py (renamed from youtube_dl/extractor/porncom.py)0
-rw-r--r--youtube_dlc/extractor/pornhd.py121
-rw-r--r--youtube_dlc/extractor/pornhub.py611
-rw-r--r--youtube_dlc/extractor/pornotube.py (renamed from youtube_dl/extractor/pornotube.py)0
-rw-r--r--youtube_dlc/extractor/pornovoisines.py (renamed from youtube_dl/extractor/pornovoisines.py)0
-rw-r--r--youtube_dlc/extractor/pornoxo.py (renamed from youtube_dl/extractor/pornoxo.py)0
-rw-r--r--youtube_dlc/extractor/presstv.py (renamed from youtube_dl/extractor/presstv.py)0
-rw-r--r--youtube_dlc/extractor/prosiebensat1.py500
-rw-r--r--youtube_dlc/extractor/puhutv.py239
-rw-r--r--youtube_dlc/extractor/puls4.py (renamed from youtube_dl/extractor/puls4.py)0
-rw-r--r--youtube_dlc/extractor/pyvideo.py (renamed from youtube_dl/extractor/pyvideo.py)0
-rw-r--r--youtube_dlc/extractor/qqmusic.py (renamed from youtube_dl/extractor/qqmusic.py)0
-rw-r--r--youtube_dlc/extractor/r7.py (renamed from youtube_dl/extractor/r7.py)0
-rw-r--r--youtube_dlc/extractor/radiobremen.py (renamed from youtube_dl/extractor/radiobremen.py)0
-rw-r--r--youtube_dlc/extractor/radiocanada.py (renamed from youtube_dl/extractor/radiocanada.py)0
-rw-r--r--youtube_dlc/extractor/radiode.py (renamed from youtube_dl/extractor/radiode.py)0
-rw-r--r--youtube_dlc/extractor/radiofrance.py (renamed from youtube_dl/extractor/radiofrance.py)0
-rw-r--r--youtube_dlc/extractor/radiojavan.py (renamed from youtube_dl/extractor/radiojavan.py)0
-rw-r--r--youtube_dlc/extractor/rai.py (renamed from youtube_dl/extractor/rai.py)0
-rw-r--r--youtube_dlc/extractor/raywenderlich.py (renamed from youtube_dl/extractor/raywenderlich.py)0
-rw-r--r--youtube_dlc/extractor/rbmaradio.py (renamed from youtube_dl/extractor/rbmaradio.py)0
-rw-r--r--youtube_dlc/extractor/rds.py (renamed from youtube_dl/extractor/rds.py)0
-rw-r--r--youtube_dlc/extractor/redbulltv.py (renamed from youtube_dl/extractor/redbulltv.py)0
-rw-r--r--youtube_dlc/extractor/reddit.py (renamed from youtube_dl/extractor/reddit.py)0
-rw-r--r--youtube_dlc/extractor/redtube.py133
-rw-r--r--youtube_dlc/extractor/regiotv.py (renamed from youtube_dl/extractor/regiotv.py)0
-rw-r--r--youtube_dlc/extractor/rentv.py (renamed from youtube_dl/extractor/rentv.py)0
-rw-r--r--youtube_dlc/extractor/restudy.py (renamed from youtube_dl/extractor/restudy.py)0
-rw-r--r--youtube_dlc/extractor/reuters.py (renamed from youtube_dl/extractor/reuters.py)0
-rw-r--r--youtube_dlc/extractor/reverbnation.py (renamed from youtube_dl/extractor/reverbnation.py)0
-rw-r--r--youtube_dlc/extractor/rice.py (renamed from youtube_dl/extractor/rice.py)0
-rw-r--r--youtube_dlc/extractor/rmcdecouverte.py (renamed from youtube_dl/extractor/rmcdecouverte.py)0
-rw-r--r--youtube_dlc/extractor/ro220.py (renamed from youtube_dl/extractor/ro220.py)0
-rw-r--r--youtube_dlc/extractor/rockstargames.py (renamed from youtube_dl/extractor/rockstargames.py)0
-rw-r--r--youtube_dlc/extractor/roosterteeth.py137
-rw-r--r--youtube_dlc/extractor/rottentomatoes.py (renamed from youtube_dl/extractor/rottentomatoes.py)0
-rw-r--r--youtube_dlc/extractor/roxwel.py (renamed from youtube_dl/extractor/roxwel.py)0
-rw-r--r--youtube_dlc/extractor/rozhlas.py (renamed from youtube_dl/extractor/rozhlas.py)0
-rw-r--r--youtube_dlc/extractor/rtbf.py (renamed from youtube_dl/extractor/rtbf.py)0
-rw-r--r--youtube_dlc/extractor/rte.py (renamed from youtube_dl/extractor/rte.py)0
-rw-r--r--youtube_dlc/extractor/rtl2.py (renamed from youtube_dl/extractor/rtl2.py)0
-rw-r--r--youtube_dlc/extractor/rtlnl.py (renamed from youtube_dl/extractor/rtlnl.py)0
-rw-r--r--youtube_dlc/extractor/rtp.py (renamed from youtube_dl/extractor/rtp.py)0
-rw-r--r--youtube_dlc/extractor/rts.py (renamed from youtube_dl/extractor/rts.py)0
-rw-r--r--youtube_dlc/extractor/rtve.py (renamed from youtube_dl/extractor/rtve.py)0
-rw-r--r--youtube_dlc/extractor/rtvnh.py (renamed from youtube_dl/extractor/rtvnh.py)0
-rw-r--r--youtube_dlc/extractor/rtvs.py (renamed from youtube_dl/extractor/rtvs.py)0
-rw-r--r--youtube_dlc/extractor/ruhd.py (renamed from youtube_dl/extractor/ruhd.py)0
-rw-r--r--youtube_dlc/extractor/rutube.py (renamed from youtube_dl/extractor/rutube.py)0
-rw-r--r--youtube_dlc/extractor/rutv.py (renamed from youtube_dl/extractor/rutv.py)0
-rw-r--r--youtube_dlc/extractor/ruutu.py (renamed from youtube_dl/extractor/ruutu.py)0
-rw-r--r--youtube_dlc/extractor/ruv.py (renamed from youtube_dl/extractor/ruv.py)0
-rw-r--r--youtube_dlc/extractor/safari.py264
-rw-r--r--youtube_dlc/extractor/sapo.py (renamed from youtube_dl/extractor/sapo.py)0
-rw-r--r--youtube_dlc/extractor/savefrom.py (renamed from youtube_dl/extractor/savefrom.py)0
-rw-r--r--youtube_dlc/extractor/sbs.py (renamed from youtube_dl/extractor/sbs.py)0
-rw-r--r--youtube_dlc/extractor/screencast.py (renamed from youtube_dl/extractor/screencast.py)0
-rw-r--r--youtube_dlc/extractor/screencastomatic.py (renamed from youtube_dl/extractor/screencastomatic.py)0
-rw-r--r--youtube_dlc/extractor/scrippsnetworks.py152
-rw-r--r--youtube_dlc/extractor/scte.py144
-rw-r--r--youtube_dlc/extractor/seeker.py58
-rw-r--r--youtube_dlc/extractor/senateisvp.py (renamed from youtube_dl/extractor/senateisvp.py)0
-rw-r--r--youtube_dlc/extractor/sendtonews.py (renamed from youtube_dl/extractor/sendtonews.py)0
-rw-r--r--youtube_dlc/extractor/servus.py69
-rw-r--r--youtube_dlc/extractor/sevenplus.py (renamed from youtube_dl/extractor/sevenplus.py)0
-rw-r--r--youtube_dlc/extractor/sexu.py (renamed from youtube_dl/extractor/sexu.py)0
-rw-r--r--youtube_dlc/extractor/seznamzpravy.py (renamed from youtube_dl/extractor/seznamzpravy.py)0
-rw-r--r--youtube_dlc/extractor/shahid.py (renamed from youtube_dl/extractor/shahid.py)0
-rw-r--r--youtube_dlc/extractor/shared.py138
-rw-r--r--youtube_dlc/extractor/showroomlive.py (renamed from youtube_dl/extractor/showroomlive.py)0
-rw-r--r--youtube_dlc/extractor/sina.py (renamed from youtube_dl/extractor/sina.py)0
-rw-r--r--youtube_dlc/extractor/sixplay.py (renamed from youtube_dl/extractor/sixplay.py)0
-rw-r--r--youtube_dlc/extractor/sky.py (renamed from youtube_dl/extractor/sky.py)0
-rw-r--r--youtube_dlc/extractor/skylinewebcams.py (renamed from youtube_dl/extractor/skylinewebcams.py)0
-rw-r--r--youtube_dlc/extractor/skynewsarabia.py (renamed from youtube_dl/extractor/skynewsarabia.py)0
-rw-r--r--youtube_dlc/extractor/slideshare.py (renamed from youtube_dl/extractor/slideshare.py)0
-rw-r--r--youtube_dlc/extractor/slideslive.py61
-rw-r--r--youtube_dlc/extractor/slutload.py (renamed from youtube_dl/extractor/slutload.py)0
-rw-r--r--youtube_dlc/extractor/smotri.py (renamed from youtube_dl/extractor/smotri.py)0
-rw-r--r--youtube_dlc/extractor/snotr.py (renamed from youtube_dl/extractor/snotr.py)0
-rw-r--r--youtube_dlc/extractor/sohu.py202
-rw-r--r--youtube_dlc/extractor/sonyliv.py (renamed from youtube_dl/extractor/sonyliv.py)0
-rw-r--r--youtube_dlc/extractor/soundcloud.py899
-rw-r--r--youtube_dlc/extractor/soundgasm.py (renamed from youtube_dl/extractor/soundgasm.py)0
-rw-r--r--youtube_dlc/extractor/southpark.py (renamed from youtube_dl/extractor/southpark.py)0
-rw-r--r--youtube_dlc/extractor/spankbang.py184
-rw-r--r--youtube_dlc/extractor/spankwire.py182
-rw-r--r--youtube_dlc/extractor/spiegel.py (renamed from youtube_dl/extractor/spiegel.py)0
-rw-r--r--youtube_dlc/extractor/spiegeltv.py (renamed from youtube_dl/extractor/spiegeltv.py)0
-rw-r--r--youtube_dlc/extractor/spike.py55
-rw-r--r--youtube_dlc/extractor/sport5.py (renamed from youtube_dl/extractor/sport5.py)0
-rw-r--r--youtube_dlc/extractor/sportbox.py (renamed from youtube_dl/extractor/sportbox.py)0
-rw-r--r--youtube_dlc/extractor/sportdeutschland.py82
-rw-r--r--youtube_dlc/extractor/springboardplatform.py (renamed from youtube_dl/extractor/springboardplatform.py)0
-rw-r--r--youtube_dlc/extractor/sprout.py (renamed from youtube_dl/extractor/sprout.py)0
-rw-r--r--youtube_dlc/extractor/srgssr.py (renamed from youtube_dl/extractor/srgssr.py)0
-rw-r--r--youtube_dlc/extractor/srmediathek.py59
-rw-r--r--youtube_dlc/extractor/stanfordoc.py (renamed from youtube_dl/extractor/stanfordoc.py)0
-rw-r--r--youtube_dlc/extractor/steam.py (renamed from youtube_dl/extractor/steam.py)0
-rw-r--r--youtube_dlc/extractor/stitcher.py (renamed from youtube_dl/extractor/stitcher.py)0
-rw-r--r--youtube_dlc/extractor/storyfire.py255
-rw-r--r--youtube_dlc/extractor/streamable.py (renamed from youtube_dl/extractor/streamable.py)0
-rw-r--r--youtube_dlc/extractor/streamcloud.py78
-rw-r--r--youtube_dlc/extractor/streamcz.py (renamed from youtube_dl/extractor/streamcz.py)0
-rw-r--r--youtube_dlc/extractor/streetvoice.py (renamed from youtube_dl/extractor/streetvoice.py)0
-rw-r--r--youtube_dlc/extractor/stretchinternet.py32
-rw-r--r--youtube_dlc/extractor/stv.py67
-rw-r--r--youtube_dlc/extractor/sunporno.py (renamed from youtube_dl/extractor/sunporno.py)0
-rw-r--r--youtube_dlc/extractor/sverigesradio.py (renamed from youtube_dl/extractor/sverigesradio.py)0
-rw-r--r--youtube_dlc/extractor/svt.py380
-rw-r--r--youtube_dlc/extractor/swrmediathek.py (renamed from youtube_dl/extractor/swrmediathek.py)0
-rw-r--r--youtube_dlc/extractor/syfy.py (renamed from youtube_dl/extractor/syfy.py)0
-rw-r--r--youtube_dlc/extractor/sztvhu.py (renamed from youtube_dl/extractor/sztvhu.py)0
-rw-r--r--youtube_dlc/extractor/tagesschau.py (renamed from youtube_dl/extractor/tagesschau.py)0
-rw-r--r--youtube_dlc/extractor/tass.py (renamed from youtube_dl/extractor/tass.py)0
-rw-r--r--youtube_dlc/extractor/tastytrade.py (renamed from youtube_dl/extractor/tastytrade.py)0
-rw-r--r--youtube_dlc/extractor/tbs.py (renamed from youtube_dl/extractor/tbs.py)0
-rw-r--r--youtube_dlc/extractor/tdslifeway.py (renamed from youtube_dl/extractor/tdslifeway.py)0
-rw-r--r--youtube_dlc/extractor/teachable.py298
-rw-r--r--youtube_dlc/extractor/teachertube.py (renamed from youtube_dl/extractor/teachertube.py)0
-rw-r--r--youtube_dlc/extractor/teachingchannel.py33
-rw-r--r--youtube_dlc/extractor/teamcoco.py205
-rw-r--r--youtube_dlc/extractor/teamtreehouse.py (renamed from youtube_dl/extractor/teamtreehouse.py)0
-rw-r--r--youtube_dlc/extractor/techtalks.py (renamed from youtube_dl/extractor/techtalks.py)0
-rw-r--r--youtube_dlc/extractor/ted.py (renamed from youtube_dl/extractor/ted.py)0
-rw-r--r--youtube_dlc/extractor/tele13.py (renamed from youtube_dl/extractor/tele13.py)0
-rw-r--r--youtube_dlc/extractor/tele5.py108
-rw-r--r--youtube_dlc/extractor/telebruxelles.py (renamed from youtube_dl/extractor/telebruxelles.py)0
-rw-r--r--youtube_dlc/extractor/telecinco.py188
-rw-r--r--youtube_dlc/extractor/telegraaf.py89
-rw-r--r--youtube_dlc/extractor/telemb.py (renamed from youtube_dl/extractor/telemb.py)0
-rw-r--r--youtube_dlc/extractor/telequebec.py205
-rw-r--r--youtube_dlc/extractor/teletask.py (renamed from youtube_dl/extractor/teletask.py)0
-rw-r--r--youtube_dlc/extractor/telewebion.py (renamed from youtube_dl/extractor/telewebion.py)0
-rw-r--r--youtube_dlc/extractor/tennistv.py (renamed from youtube_dl/extractor/tennistv.py)0
-rw-r--r--youtube_dlc/extractor/tenplay.py58
-rw-r--r--youtube_dlc/extractor/testurl.py (renamed from youtube_dl/extractor/testurl.py)0
-rw-r--r--youtube_dlc/extractor/tf1.py (renamed from youtube_dl/extractor/tf1.py)0
-rw-r--r--youtube_dlc/extractor/tfo.py55
-rw-r--r--youtube_dlc/extractor/theintercept.py (renamed from youtube_dl/extractor/theintercept.py)0
-rw-r--r--youtube_dlc/extractor/theplatform.py (renamed from youtube_dl/extractor/theplatform.py)0
-rw-r--r--youtube_dlc/extractor/thescene.py (renamed from youtube_dl/extractor/thescene.py)0
-rw-r--r--youtube_dlc/extractor/thestar.py (renamed from youtube_dl/extractor/thestar.py)0
-rw-r--r--youtube_dlc/extractor/thesun.py38
-rw-r--r--youtube_dlc/extractor/theweatherchannel.py (renamed from youtube_dl/extractor/theweatherchannel.py)0
-rw-r--r--youtube_dlc/extractor/thisamericanlife.py (renamed from youtube_dl/extractor/thisamericanlife.py)0
-rw-r--r--youtube_dlc/extractor/thisav.py (renamed from youtube_dl/extractor/thisav.py)0
-rw-r--r--youtube_dlc/extractor/thisoldhouse.py47
-rw-r--r--youtube_dlc/extractor/threeqsdn.py (renamed from youtube_dl/extractor/threeqsdn.py)0
-rw-r--r--youtube_dlc/extractor/tiktok.py (renamed from youtube_dl/extractor/tiktok.py)0
-rw-r--r--youtube_dlc/extractor/tinypic.py (renamed from youtube_dl/extractor/tinypic.py)0
-rw-r--r--youtube_dlc/extractor/tmz.py (renamed from youtube_dl/extractor/tmz.py)0
-rw-r--r--youtube_dlc/extractor/tnaflix.py (renamed from youtube_dl/extractor/tnaflix.py)0
-rw-r--r--youtube_dlc/extractor/toggle.py213
-rw-r--r--youtube_dlc/extractor/tonline.py (renamed from youtube_dl/extractor/tonline.py)0
-rw-r--r--youtube_dlc/extractor/toongoggles.py (renamed from youtube_dl/extractor/toongoggles.py)0
-rw-r--r--youtube_dlc/extractor/toutv.py (renamed from youtube_dl/extractor/toutv.py)0
-rw-r--r--youtube_dlc/extractor/toypics.py (renamed from youtube_dl/extractor/toypics.py)0
-rw-r--r--youtube_dlc/extractor/traileraddict.py (renamed from youtube_dl/extractor/traileraddict.py)0
-rw-r--r--youtube_dlc/extractor/trilulilu.py (renamed from youtube_dl/extractor/trilulilu.py)0
-rw-r--r--youtube_dlc/extractor/trunews.py34
-rw-r--r--youtube_dlc/extractor/trutv.py (renamed from youtube_dl/extractor/trutv.py)0
-rw-r--r--youtube_dlc/extractor/tube8.py (renamed from youtube_dl/extractor/tube8.py)0
-rw-r--r--youtube_dlc/extractor/tubitv.py (renamed from youtube_dl/extractor/tubitv.py)0
-rw-r--r--youtube_dlc/extractor/tudou.py (renamed from youtube_dl/extractor/tudou.py)0
-rw-r--r--youtube_dlc/extractor/tumblr.py213
-rw-r--r--youtube_dlc/extractor/tunein.py (renamed from youtube_dl/extractor/tunein.py)0
-rw-r--r--youtube_dlc/extractor/tunepk.py (renamed from youtube_dl/extractor/tunepk.py)0
-rw-r--r--youtube_dlc/extractor/turbo.py (renamed from youtube_dl/extractor/turbo.py)0
-rw-r--r--youtube_dlc/extractor/turner.py (renamed from youtube_dl/extractor/turner.py)0
-rw-r--r--youtube_dlc/extractor/tv2.py192
-rw-r--r--youtube_dlc/extractor/tv2dk.py154
-rw-r--r--youtube_dlc/extractor/tv2hu.py (renamed from youtube_dl/extractor/tv2hu.py)0
-rw-r--r--youtube_dlc/extractor/tv4.py124
-rw-r--r--youtube_dlc/extractor/tv5mondeplus.py117
-rw-r--r--youtube_dlc/extractor/tva.py57
-rw-r--r--youtube_dlc/extractor/tvanouvelles.py (renamed from youtube_dl/extractor/tvanouvelles.py)0
-rw-r--r--youtube_dlc/extractor/tvc.py (renamed from youtube_dl/extractor/tvc.py)0
-rw-r--r--youtube_dlc/extractor/tvigle.py (renamed from youtube_dl/extractor/tvigle.py)0
-rw-r--r--youtube_dlc/extractor/tvland.py (renamed from youtube_dl/extractor/tvland.py)0
-rw-r--r--youtube_dlc/extractor/tvn24.py (renamed from youtube_dl/extractor/tvn24.py)0
-rw-r--r--youtube_dlc/extractor/tvnet.py (renamed from youtube_dl/extractor/tvnet.py)0
-rw-r--r--youtube_dlc/extractor/tvnoe.py (renamed from youtube_dl/extractor/tvnoe.py)0
-rw-r--r--youtube_dlc/extractor/tvnow.py644
-rw-r--r--youtube_dlc/extractor/tvp.py (renamed from youtube_dl/extractor/tvp.py)0
-rw-r--r--youtube_dlc/extractor/tvplay.py512
-rw-r--r--youtube_dlc/extractor/tvplayer.py (renamed from youtube_dl/extractor/tvplayer.py)0
-rw-r--r--youtube_dlc/extractor/tweakers.py (renamed from youtube_dl/extractor/tweakers.py)0
-rw-r--r--youtube_dlc/extractor/twentyfourvideo.py133
-rw-r--r--youtube_dlc/extractor/twentymin.py (renamed from youtube_dl/extractor/twentymin.py)0
-rw-r--r--youtube_dlc/extractor/twentythreevideo.py (renamed from youtube_dl/extractor/twentythreevideo.py)0
-rw-r--r--youtube_dlc/extractor/twitcasting.py (renamed from youtube_dl/extractor/twitcasting.py)0
-rw-r--r--youtube_dlc/extractor/twitch.py802
-rw-r--r--youtube_dlc/extractor/twitter.py610
-rw-r--r--youtube_dlc/extractor/udemy.py481
-rw-r--r--youtube_dlc/extractor/udn.py (renamed from youtube_dl/extractor/udn.py)0
-rw-r--r--youtube_dlc/extractor/ufctv.py16
-rw-r--r--youtube_dlc/extractor/uktvplay.py (renamed from youtube_dl/extractor/uktvplay.py)0
-rw-r--r--youtube_dlc/extractor/umg.py (renamed from youtube_dl/extractor/umg.py)0
-rw-r--r--youtube_dlc/extractor/unistra.py (renamed from youtube_dl/extractor/unistra.py)0
-rw-r--r--youtube_dlc/extractor/unity.py (renamed from youtube_dl/extractor/unity.py)0
-rw-r--r--youtube_dlc/extractor/uol.py144
-rw-r--r--youtube_dlc/extractor/uplynk.py (renamed from youtube_dl/extractor/uplynk.py)0
-rw-r--r--youtube_dlc/extractor/urort.py (renamed from youtube_dl/extractor/urort.py)0
-rw-r--r--youtube_dlc/extractor/urplay.py (renamed from youtube_dl/extractor/urplay.py)0
-rw-r--r--youtube_dlc/extractor/usanetwork.py (renamed from youtube_dl/extractor/usanetwork.py)0
-rw-r--r--youtube_dlc/extractor/usatoday.py (renamed from youtube_dl/extractor/usatoday.py)0
-rw-r--r--youtube_dlc/extractor/ustream.py (renamed from youtube_dl/extractor/ustream.py)0
-rw-r--r--youtube_dlc/extractor/ustudio.py (renamed from youtube_dl/extractor/ustudio.py)0
-rw-r--r--youtube_dlc/extractor/varzesh3.py (renamed from youtube_dl/extractor/varzesh3.py)0
-rw-r--r--youtube_dlc/extractor/vbox7.py (renamed from youtube_dl/extractor/vbox7.py)0
-rw-r--r--youtube_dlc/extractor/veehd.py (renamed from youtube_dl/extractor/veehd.py)0
-rw-r--r--youtube_dlc/extractor/veoh.py (renamed from youtube_dl/extractor/veoh.py)0
-rw-r--r--youtube_dlc/extractor/vesti.py (renamed from youtube_dl/extractor/vesti.py)0
-rw-r--r--youtube_dlc/extractor/vevo.py (renamed from youtube_dl/extractor/vevo.py)0
-rw-r--r--youtube_dlc/extractor/vgtv.py (renamed from youtube_dl/extractor/vgtv.py)0
-rw-r--r--youtube_dlc/extractor/vh1.py (renamed from youtube_dl/extractor/vh1.py)0
-rw-r--r--youtube_dlc/extractor/vice.py337
-rw-r--r--youtube_dlc/extractor/vidbit.py (renamed from youtube_dl/extractor/vidbit.py)0
-rw-r--r--youtube_dlc/extractor/viddler.py (renamed from youtube_dl/extractor/viddler.py)0
-rw-r--r--youtube_dlc/extractor/videa.py164
-rw-r--r--youtube_dlc/extractor/videodetective.py29
-rw-r--r--youtube_dlc/extractor/videofyme.py (renamed from youtube_dl/extractor/videofyme.py)0
-rw-r--r--youtube_dlc/extractor/videomore.py (renamed from youtube_dl/extractor/videomore.py)0
-rw-r--r--youtube_dlc/extractor/videopress.py (renamed from youtube_dl/extractor/videopress.py)0
-rw-r--r--youtube_dlc/extractor/vidio.py (renamed from youtube_dl/extractor/vidio.py)0
-rw-r--r--youtube_dlc/extractor/vidlii.py (renamed from youtube_dl/extractor/vidlii.py)0
-rw-r--r--youtube_dlc/extractor/vidme.py (renamed from youtube_dl/extractor/vidme.py)0
-rw-r--r--youtube_dlc/extractor/vidzi.py68
-rw-r--r--youtube_dlc/extractor/vier.py (renamed from youtube_dl/extractor/vier.py)0
-rw-r--r--youtube_dlc/extractor/viewlift.py250
-rw-r--r--youtube_dlc/extractor/viidea.py (renamed from youtube_dl/extractor/viidea.py)0
-rw-r--r--youtube_dlc/extractor/viki.py384
-rw-r--r--youtube_dlc/extractor/vimeo.py1128
-rw-r--r--youtube_dlc/extractor/vimple.py (renamed from youtube_dl/extractor/vimple.py)0
-rw-r--r--youtube_dlc/extractor/vine.py (renamed from youtube_dl/extractor/vine.py)0
-rw-r--r--youtube_dlc/extractor/viqeo.py (renamed from youtube_dl/extractor/viqeo.py)0
-rw-r--r--youtube_dlc/extractor/viu.py (renamed from youtube_dl/extractor/viu.py)0
-rw-r--r--youtube_dlc/extractor/vk.py678
-rw-r--r--youtube_dlc/extractor/vlive.py367
-rw-r--r--youtube_dlc/extractor/vodlocker.py (renamed from youtube_dl/extractor/vodlocker.py)0
-rw-r--r--youtube_dlc/extractor/vodpl.py (renamed from youtube_dl/extractor/vodpl.py)0
-rw-r--r--youtube_dlc/extractor/vodplatform.py40
-rw-r--r--youtube_dlc/extractor/voicerepublic.py62
-rw-r--r--youtube_dlc/extractor/voot.py (renamed from youtube_dl/extractor/voot.py)0
-rw-r--r--youtube_dlc/extractor/voxmedia.py (renamed from youtube_dl/extractor/voxmedia.py)0
-rw-r--r--youtube_dlc/extractor/vrak.py (renamed from youtube_dl/extractor/vrak.py)0
-rw-r--r--youtube_dlc/extractor/vrt.py (renamed from youtube_dl/extractor/vrt.py)0
-rw-r--r--youtube_dlc/extractor/vrv.py (renamed from youtube_dl/extractor/vrv.py)0
-rw-r--r--youtube_dlc/extractor/vshare.py (renamed from youtube_dl/extractor/vshare.py)0
-rw-r--r--youtube_dlc/extractor/vube.py (renamed from youtube_dl/extractor/vube.py)0
-rw-r--r--youtube_dlc/extractor/vuclip.py (renamed from youtube_dl/extractor/vuclip.py)0
-rw-r--r--youtube_dlc/extractor/vvvvid.py (renamed from youtube_dl/extractor/vvvvid.py)0
-rw-r--r--youtube_dlc/extractor/vyborymos.py (renamed from youtube_dl/extractor/vyborymos.py)0
-rw-r--r--youtube_dlc/extractor/vzaar.py112
-rw-r--r--youtube_dlc/extractor/wakanim.py (renamed from youtube_dl/extractor/wakanim.py)0
-rw-r--r--youtube_dlc/extractor/walla.py (renamed from youtube_dl/extractor/walla.py)0
-rw-r--r--youtube_dlc/extractor/washingtonpost.py (renamed from youtube_dl/extractor/washingtonpost.py)0
-rw-r--r--youtube_dlc/extractor/wat.py (renamed from youtube_dl/extractor/wat.py)0
-rw-r--r--youtube_dlc/extractor/watchbox.py (renamed from youtube_dl/extractor/watchbox.py)0
-rw-r--r--youtube_dlc/extractor/watchindianporn.py (renamed from youtube_dl/extractor/watchindianporn.py)0
-rw-r--r--youtube_dlc/extractor/wdr.py (renamed from youtube_dl/extractor/wdr.py)0
-rw-r--r--youtube_dlc/extractor/webcaster.py (renamed from youtube_dl/extractor/webcaster.py)0
-rw-r--r--youtube_dlc/extractor/webofstories.py (renamed from youtube_dl/extractor/webofstories.py)0
-rw-r--r--youtube_dlc/extractor/weibo.py (renamed from youtube_dl/extractor/weibo.py)0
-rw-r--r--youtube_dlc/extractor/weiqitv.py (renamed from youtube_dl/extractor/weiqitv.py)0
-rw-r--r--youtube_dlc/extractor/wistia.py162
-rw-r--r--youtube_dlc/extractor/worldstarhiphop.py (renamed from youtube_dl/extractor/worldstarhiphop.py)0
-rw-r--r--youtube_dlc/extractor/wsj.py (renamed from youtube_dl/extractor/wsj.py)0
-rw-r--r--youtube_dlc/extractor/wwe.py (renamed from youtube_dl/extractor/wwe.py)0
-rw-r--r--youtube_dlc/extractor/xbef.py (renamed from youtube_dl/extractor/xbef.py)0
-rw-r--r--youtube_dlc/extractor/xboxclips.py (renamed from youtube_dl/extractor/xboxclips.py)0
-rw-r--r--youtube_dlc/extractor/xfileshare.py193
-rw-r--r--youtube_dlc/extractor/xhamster.py393
-rw-r--r--youtube_dlc/extractor/xiami.py (renamed from youtube_dl/extractor/xiami.py)0
-rw-r--r--youtube_dlc/extractor/ximalaya.py (renamed from youtube_dl/extractor/ximalaya.py)0
-rw-r--r--youtube_dlc/extractor/xminus.py (renamed from youtube_dl/extractor/xminus.py)0
-rw-r--r--youtube_dlc/extractor/xnxx.py (renamed from youtube_dl/extractor/xnxx.py)0
-rw-r--r--youtube_dlc/extractor/xstream.py (renamed from youtube_dl/extractor/xstream.py)0
-rw-r--r--youtube_dlc/extractor/xtube.py200
-rw-r--r--youtube_dlc/extractor/xuite.py (renamed from youtube_dl/extractor/xuite.py)0
-rw-r--r--youtube_dlc/extractor/xvideos.py (renamed from youtube_dl/extractor/xvideos.py)0
-rw-r--r--youtube_dlc/extractor/xxxymovies.py (renamed from youtube_dl/extractor/xxxymovies.py)0
-rw-r--r--youtube_dlc/extractor/yahoo.py569
-rw-r--r--youtube_dlc/extractor/yandexdisk.py (renamed from youtube_dl/extractor/yandexdisk.py)0
-rw-r--r--youtube_dlc/extractor/yandexmusic.py313
-rw-r--r--youtube_dlc/extractor/yandexvideo.py (renamed from youtube_dl/extractor/yandexvideo.py)0
-rw-r--r--youtube_dlc/extractor/yapfiles.py (renamed from youtube_dl/extractor/yapfiles.py)0
-rw-r--r--youtube_dlc/extractor/yesjapan.py (renamed from youtube_dl/extractor/yesjapan.py)0
-rw-r--r--youtube_dlc/extractor/yinyuetai.py (renamed from youtube_dl/extractor/yinyuetai.py)0
-rw-r--r--youtube_dlc/extractor/ynet.py (renamed from youtube_dl/extractor/ynet.py)0
-rw-r--r--youtube_dlc/extractor/youjizz.py95
-rw-r--r--youtube_dlc/extractor/youku.py (renamed from youtube_dl/extractor/youku.py)0
-rw-r--r--youtube_dlc/extractor/younow.py (renamed from youtube_dl/extractor/younow.py)0
-rw-r--r--youtube_dlc/extractor/youporn.py203
-rw-r--r--youtube_dlc/extractor/yourporn.py67
-rw-r--r--youtube_dlc/extractor/yourupload.py (renamed from youtube_dl/extractor/yourupload.py)0
-rw-r--r--youtube_dlc/extractor/youtube.py3445
-rw-r--r--youtube_dlc/extractor/zapiks.py109
-rw-r--r--youtube_dlc/extractor/zaq1.py (renamed from youtube_dl/extractor/zaq1.py)0
-rw-r--r--youtube_dlc/extractor/zattoo.py (renamed from youtube_dl/extractor/zattoo.py)0
-rw-r--r--youtube_dlc/extractor/zdf.py332
-rw-r--r--youtube_dlc/extractor/zingmp3.py (renamed from youtube_dl/extractor/zingmp3.py)0
-rw-r--r--youtube_dlc/extractor/zype.py134
-rw-r--r--youtube_dlc/jsinterp.py (renamed from youtube_dl/jsinterp.py)0
-rw-r--r--youtube_dlc/options.py916
-rw-r--r--youtube_dlc/postprocessor/__init__.py (renamed from youtube_dl/postprocessor/__init__.py)0
-rw-r--r--youtube_dlc/postprocessor/common.py (renamed from youtube_dl/postprocessor/common.py)0
-rw-r--r--youtube_dlc/postprocessor/embedthumbnail.py115
-rw-r--r--youtube_dlc/postprocessor/execafterdownload.py (renamed from youtube_dl/postprocessor/execafterdownload.py)0
-rw-r--r--youtube_dlc/postprocessor/ffmpeg.py657
-rw-r--r--youtube_dlc/postprocessor/metadatafromtitle.py (renamed from youtube_dl/postprocessor/metadatafromtitle.py)0
-rw-r--r--youtube_dlc/postprocessor/xattrpp.py (renamed from youtube_dl/postprocessor/xattrpp.py)0
-rw-r--r--youtube_dlc/socks.py (renamed from youtube_dl/socks.py)0
-rw-r--r--youtube_dlc/swfinterp.py (renamed from youtube_dl/swfinterp.py)0
-rw-r--r--youtube_dlc/update.py190
-rw-r--r--youtube_dlc/utils.py5707
-rw-r--r--youtube_dlc/version.py3
1092 files changed, 68756 insertions, 68638 deletions
diff --git a/.github/ISSUE_TEMPLATE/1_broken_site.md b/.github/ISSUE_TEMPLATE/1_broken_site.md
index 2fea0120e..f2260db46 100644
--- a/.github/ISSUE_TEMPLATE/1_broken_site.md
+++ b/.github/ISSUE_TEMPLATE/1_broken_site.md
@@ -18,7 +18,7 @@ title: ''
<!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
-- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.09.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
+- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.07.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@@ -26,7 +26,7 @@ Carefully read and work through this check list in order to prevent the most com
-->
- [ ] I'm reporting a broken site support
-- [ ] I've verified that I'm running youtube-dl version **2019.09.28**
+- [ ] I've verified that I'm running youtube-dl version **2020.07.28**
- [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar issues including closed ones
@@ -41,7 +41,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
- [debug] youtube-dl version 2019.09.28
+ [debug] youtube-dl version 2020.07.28
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}
diff --git a/.github/ISSUE_TEMPLATE/2_site_support_request.md b/.github/ISSUE_TEMPLATE/2_site_support_request.md
index 6116acc79..8bc05c4ba 100644
--- a/.github/ISSUE_TEMPLATE/2_site_support_request.md
+++ b/.github/ISSUE_TEMPLATE/2_site_support_request.md
@@ -19,7 +19,7 @@ labels: 'site-support-request'
<!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
-- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.09.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
+- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.07.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that site you are requesting is not dedicated to copyright infringement, see https://yt-dl.org/copyright-infringement. youtube-dl does not support such sites. In order for site support request to be accepted all provided example URLs should not violate any copyrights.
- Search the bugtracker for similar site support requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
-->
- [ ] I'm reporting a new site support request
-- [ ] I've verified that I'm running youtube-dl version **2019.09.28**
+- [ ] I've verified that I'm running youtube-dl version **2020.07.28**
- [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that none of provided URLs violate any copyrights
- [ ] I've searched the bugtracker for similar site support requests including closed ones
diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.md b/.github/ISSUE_TEMPLATE/3_site_feature_request.md
index 79d1a7f3c..98348e0cd 100644
--- a/.github/ISSUE_TEMPLATE/3_site_feature_request.md
+++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.md
@@ -18,13 +18,13 @@ title: ''
<!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
-- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.09.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
+- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.07.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar site feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x])
-->
- [ ] I'm reporting a site feature request
-- [ ] I've verified that I'm running youtube-dl version **2019.09.28**
+- [ ] I've verified that I'm running youtube-dl version **2020.07.28**
- [ ] I've searched the bugtracker for similar site feature requests including closed ones
diff --git a/.github/ISSUE_TEMPLATE/4_bug_report.md b/.github/ISSUE_TEMPLATE/4_bug_report.md
index 9bda3d440..86706f528 100644
--- a/.github/ISSUE_TEMPLATE/4_bug_report.md
+++ b/.github/ISSUE_TEMPLATE/4_bug_report.md
@@ -18,7 +18,7 @@ title: ''
<!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
-- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.09.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
+- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.07.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Make sure that all provided video/audio/playlist URLs (if any) are alive and playable in a browser.
- Make sure that all URLs and arguments with special characters are properly quoted or escaped as explained in http://yt-dl.org/escape.
- Search the bugtracker for similar issues: http://yt-dl.org/search-issues. DO NOT post duplicates.
@@ -27,7 +27,7 @@ Carefully read and work through this check list in order to prevent the most com
-->
- [ ] I'm reporting a broken site support issue
-- [ ] I've verified that I'm running youtube-dl version **2019.09.28**
+- [ ] I've verified that I'm running youtube-dl version **2020.07.28**
- [ ] I've checked that all provided URLs are alive and playable in a browser
- [ ] I've checked that all URLs and arguments with special characters are properly quoted or escaped
- [ ] I've searched the bugtracker for similar bug reports including closed ones
@@ -43,7 +43,7 @@ Add the `-v` flag to your command line you run youtube-dl with (`youtube-dl -v <
[debug] User config: []
[debug] Command-line args: [u'-v', u'http://www.youtube.com/watch?v=BaW_jenozKcj']
[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
- [debug] youtube-dl version 2019.09.28
+ [debug] youtube-dl version 2020.07.28
[debug] Python version 2.7.11 - Windows-2003Server-5.2.3790-SP2
[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
[debug] Proxy map: {}
diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.md b/.github/ISSUE_TEMPLATE/5_feature_request.md
index 581344917..52c2709f9 100644
--- a/.github/ISSUE_TEMPLATE/5_feature_request.md
+++ b/.github/ISSUE_TEMPLATE/5_feature_request.md
@@ -19,13 +19,13 @@ labels: 'request'
<!--
Carefully read and work through this check list in order to prevent the most common mistakes and misuse of youtube-dl:
-- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2019.09.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
+- First of, make sure you are using the latest version of youtube-dl. Run `youtube-dl --version` and ensure your version is 2020.07.28. If it's not, see https://yt-dl.org/update on how to update. Issues with outdated version will be REJECTED.
- Search the bugtracker for similar feature requests: http://yt-dl.org/search-issues. DO NOT post duplicates.
- Finally, put x into all relevant boxes (like this [x])
-->
- [ ] I'm reporting a feature request
-- [ ] I've verified that I'm running youtube-dl version **2019.09.28**
+- [ ] I've verified that I'm running youtube-dl version **2020.07.28**
- [ ] I've searched the bugtracker for similar feature requests including closed ones
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
new file mode 100644
index 000000000..224a00230
--- /dev/null
+++ b/.github/workflows/python-publish.yml
@@ -0,0 +1,33 @@
+# This workflows will upload a Python Package using Twine when a release is created
+# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
+
+name: Upload Python Package
+
+on:
+ push:
+ branches:
+ - release
+
+jobs:
+ deploy:
+
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v2
+ - name: Set up Python
+ uses: actions/setup-python@v2
+ with:
+ python-version: '3.x'
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install setuptools wheel twine
+ - name: Build and publish
+ env:
+ TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+ TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+ run: |
+ rm -rf dist/*
+ python setup.py sdist bdist_wheel
+ twine upload dist/*
diff --git a/.gitignore b/.gitignore
index c4870a6ba..9d371d997 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,12 +11,20 @@ dist/
MANIFEST
README.txt
youtube-dl.1
+youtube-dlc.1
youtube-dl.bash-completion
+youtube-dlc.bash-completion
youtube-dl.fish
+youtube-dlc.fish
youtube_dl/extractor/lazy_extractors.py
+youtube_dlc/extractor/lazy_extractors.py
youtube-dl
+youtube-dlc
youtube-dl.exe
+youtube-dlc.exe
youtube-dl.tar.gz
+youtube-dlc.tar.gz
+youtube-dlc.spec
.coverage
cover/
updates_key.pem
@@ -41,6 +49,7 @@ updates_key.pem
test/local_parameters.json
.tox
youtube-dl.zsh
+youtube-dlc.zsh
# IntelliJ related files
.idea
diff --git a/.travis.yml b/.travis.yml
index 6d16c2955..fb499845e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -12,28 +12,27 @@ python:
dist: trusty
env:
- YTDL_TEST_SET=core
- - YTDL_TEST_SET=download
-matrix:
+jobs:
include:
- python: 3.7
dist: xenial
env: YTDL_TEST_SET=core
- - python: 3.7
- dist: xenial
- env: YTDL_TEST_SET=download
- - python: 3.8-dev
+ - python: 3.8
dist: xenial
env: YTDL_TEST_SET=core
- python: 3.8-dev
dist: xenial
- env: YTDL_TEST_SET=download
+ env: YTDL_TEST_SET=core
- env: JYTHON=true; YTDL_TEST_SET=core
- - env: JYTHON=true; YTDL_TEST_SET=download
+ - name: flake8
+ python: 3.8
+ dist: xenial
+ install: pip install flake8
+ script: flake8 .
fast_finish: true
allow_failures:
- env: YTDL_TEST_SET=download
- env: JYTHON=true; YTDL_TEST_SET=core
- - env: JYTHON=true; YTDL_TEST_SET=download
before_install:
- if [ "$JYTHON" == "true" ]; then ./devscripts/install_jython.sh; export PATH="$HOME/jython/bin:$PATH"; fi
script: ./devscripts/run_tests.sh
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index ac759ddc4..58ab3a4b8 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -153,7 +153,7 @@ After you have ensured this site is distributing its content legally, you can fo
5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
-8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart):
+8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart):
$ flake8 youtube_dl/extractor/yourextractor.py
diff --git a/ChangeLog b/ChangeLog
index 80681a9ae..bf515f784 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,591 @@
+version 2020.07.28
+
+Extractors
+* [youtube] Fix sigfunc name extraction (#26134, #26135, #26136, #26137)
+* [youtube] Improve description extraction (#25937, #25980)
+* [wistia] Restrict embed regular expression (#25969)
+* [youtube] Prevent excess HTTP 301 (#25786)
++ [youtube:playlists] Extend URL regular expression (#25810)
++ [bellmedia] Add support for cp24.com clip URLs (#25764)
+* [brightcove] Improve embed detection (#25674)
+
+
+version 2020.06.16.1
+
+Extractors
+* [youtube] Force old layout (#25682, #25683, #25680, #25686)
+* [youtube] Fix categories and improve tags extraction
+
+
+version 2020.06.16
+
+Extractors
+* [youtube] Fix uploader id and uploader URL extraction
+* [youtube] Improve view count extraction
+* [youtube] Fix upload date extraction (#25677)
+* [youtube] Fix thumbnails extraction (#25676)
+* [youtube] Fix playlist and feed extraction (#25675)
++ [facebook] Add support for single-video ID links
++ [youtube] Extract chapters from JSON (#24819)
++ [kaltura] Add support for multiple embeds on a webpage (#25523)
+
+
+version 2020.06.06
+
+Extractors
+* [tele5] Bypass geo restriction
++ [jwplatform] Add support for bypass geo restriction
+* [tele5] Prefer jwplatform over nexx (#25533)
+* [twitch:stream] Expect 400 and 410 HTTP errors from API
+* [twitch:stream] Fix extraction (#25528)
+* [twitch] Fix thumbnails extraction (#25531)
++ [twitch] Pass v5 Accept HTTP header (#25531)
+* [brightcove] Fix subtitles extraction (#25540)
++ [malltv] Add support for sk.mall.tv (#25445)
+* [periscope] Fix untitled broadcasts (#25482)
+* [jwplatform] Improve embeds extraction (#25467)
+
+
+version 2020.05.29
+
+Core
+* [postprocessor/ffmpeg] Embed series metadata with --add-metadata
+* [utils] Fix file permissions in write_json_file (#12471, #25122)
+
+Extractors
+* [ard:beta] Extend URL regular expression (#25405)
++ [youtube] Add support for more invidious instances (#25417)
+* [giantbomb] Extend URL regular expression (#25222)
+* [ard] Improve URL regular expression (#25134, #25198)
+* [redtube] Improve formats extraction and extract m3u8 formats (#25311,
+ #25321)
+* [indavideo] Switch to HTTPS for API request (#25191)
+* [redtube] Improve title extraction (#25208)
+* [vimeo] Improve format extraction and sorting (#25285)
+* [soundcloud] Reduce API playlist page limit (#25274)
++ [youtube] Add support for yewtu.be (#25226)
+* [mailru] Fix extraction (#24530, #25239)
+* [bellator] Fix mgid extraction (#25195)
+
+
+version 2020.05.08
+
+Core
+* [downloader/http] Request last data block of exact remaining size
+* [downloader/http] Finish downloading once received data length matches
+ expected
+* [extractor/common] Use compat_cookiejar_Cookie for _set_cookie to always
+ ensure cookie name and value are bytestrings on python 2 (#23256, #24776)
++ [compat] Introduce compat_cookiejar_Cookie
+* [utils] Improve cookie files support
+ + Add support for UTF-8 in cookie files
+ * Skip malformed cookie file entries instead of crashing (invalid entry
+ length, invalid expires at)
+
+Extractors
+* [youtube] Improve signature cipher extraction (#25187, #25188)
+* [iprima] Improve extraction (#25138)
+* [uol] Fix extraction (#22007)
++ [orf] Add support for more radio stations (#24938, #24968)
+* [dailymotion] Fix typo
+- [puhutv] Remove no longer available HTTP formats (#25124)
+
+
+version 2020.05.03
+
+Core
++ [extractor/common] Extract multiple JSON-LD entries
+* [options] Clarify doc on --exec command (#19087, #24883)
+* [extractor/common] Skip malformed ISM manifest XMLs while extracting
+ ISM formats (#24667)
+
+Extractors
+* [crunchyroll] Fix and improve extraction (#25096, #25060)
+* [youtube] Improve player id extraction
+* [youtube] Use redirected video id if any (#25063)
+* [yahoo] Fix GYAO Player extraction and relax URL regular expression
+ (#24178, #24778)
+* [tvplay] Fix Viafree extraction (#15189, #24473, #24789)
+* [tenplay] Relax URL regular expression (#25001)
++ [prosiebensat1] Extract series metadata
+* [prosiebensat1] Improve extraction and remove 7tv.de support (#24948)
+- [prosiebensat1] Remove 7tv.de support (#24948)
+* [youtube] Fix DRM videos detection (#24736)
+* [thisoldhouse] Fix video id extraction (#24548, #24549)
++ [soundcloud] Extract AAC format (#19173, #24708)
+* [youtube] Skip broken multifeed videos (#24711)
+* [nova:embed] Fix extraction (#24700)
+* [motherless] Fix extraction (#24699)
+* [twitch:clips] Extend URL regular expression (#24290, #24642)
+* [tv4] Fix ISM formats extraction (#24667)
+* [tele5] Fix extraction (#24553)
++ [mofosex] Add support for generic embeds (#24633)
++ [youporn] Add support for generic embeds
++ [spankwire] Add support for generic embeds (#24633)
+* [spankwire] Fix extraction (#18924, #20648)
+
+
+version 2020.03.24
+
+Core
+- [utils] Revert support for cookie files with spaces used instead of tabs
+
+Extractors
+* [teachable] Update upskillcourses and gns3 domains
+* [generic] Look for teachable embeds before wistia
++ [teachable] Extract chapter metadata (#24421)
++ [bilibili] Add support for player.bilibili.com (#24402)
++ [bilibili] Add support for new URL schema with BV ids (#24439, #24442)
+* [limelight] Remove disabled API requests (#24255)
+* [soundcloud] Fix download URL extraction (#24394)
++ [cbc:watch] Add support for authentication (#19160)
+* [hellporno] Fix extraction (#24399)
+* [xtube] Fix formats extraction (#24348)
+* [ndr] Fix extraction (#24326)
+* [nhk] Update m3u8 URL and use native HLS downloader (#24329)
+- [nhk] Remove obsolete rtmp formats (#24329)
+* [nhk] Relax URL regular expression (#24329)
+- [vimeo] Revert fix showcase password protected video extraction (#24224)
+
+
+version 2020.03.08
+
+Core
++ [utils] Add support for cookie files with spaces used instead of tabs
+
+Extractors
++ [pornhub] Add support for pornhubpremium.com (#24288)
+- [youtube] Remove outdated code and unnecessary requests
+* [youtube] Improve extraction in 429 HTTP error conditions (#24283)
+* [nhk] Update API version (#24270)
+
+
+version 2020.03.06
+
+Extractors
+* [youtube] Fix age-gated videos support without login (#24248)
+* [vimeo] Fix showcase password protected video extraction (#24224)
+* [pornhub] Improve title extraction (#24184)
+* [peertube] Improve extraction (#23657)
++ [servus] Add support for new URL schema (#23475, #23583, #24142)
+* [vimeo] Fix subtitles URLs (#24209)
+
+
+version 2020.03.01
+
+Core
+* [YoutubeDL] Force redirect URL to unicode on python 2
+- [options] Remove duplicate short option -v for --version (#24162)
+
+Extractors
+* [xhamster] Fix extraction (#24205)
+* [franceculture] Fix extraction (#24204)
++ [telecinco] Add support for article opening videos
+* [telecinco] Fix extraction (#24195)
+* [xtube] Fix metadata extraction (#21073, #22455)
+* [youjizz] Fix extraction (#24181)
+- Remove no longer needed compat_str around geturl
+* [pornhd] Fix extraction (#24128)
++ [teachable] Add support for multiple videos per lecture (#24101)
++ [wistia] Add support for multiple generic embeds (#8347, 11385)
+* [imdb] Fix extraction (#23443)
+* [tv2dk:bornholm:play] Fix extraction (#24076)
+
+
+version 2020.02.16
+
+Core
+* [YoutubeDL] Fix playlist entry indexing with --playlist-items (#10591,
+ #10622)
+* [update] Fix updating via symlinks (#23991)
++ [compat] Introduce compat_realpath (#23991)
+
+Extractors
++ [npr] Add support for streams (#24042)
++ [24video] Add support for porn.24video.net (#23779, #23784)
+- [jpopsuki] Remove extractor (#23858)
+* [nova] Improve extraction (#23690)
+* [nova:embed] Improve (#23690)
+* [nova:embed] Fix extraction (#23672)
++ [abc:iview] Add support for 720p (#22907, #22921)
+* [nytimes] Improve format sorting (#24010)
++ [toggle] Add support for mewatch.sg (#23895, #23930)
+* [thisoldhouse] Fix extraction (#23951)
++ [popcorntimes] Add support for popcorntimes.tv (#23949)
+* [sportdeutschland] Update to new API
+* [twitch:stream] Lowercase channel id for stream request (#23917)
+* [tv5mondeplus] Fix extraction (#23907, #23911)
+* [tva] Relax URL regular expression (#23903)
+* [vimeo] Fix album extraction (#23864)
+* [viewlift] Improve extraction
+ * Fix extraction (#23851)
+ + Add support for authentication
+ + Add support for more domains
+* [svt] Fix series extraction (#22297)
+* [svt] Fix article extraction (#22897, #22919)
+* [soundcloud] Imporve private playlist/set tracks extraction (#3707)
+
+
+version 2020.01.24
+
+Extractors
+* [youtube] Fix sigfunc name extraction (#23819)
+* [stretchinternet] Fix extraction (#4319)
+* [voicerepublic] Fix extraction
+* [azmedien] Fix extraction (#23783)
+* [businessinsider] Fix jwplatform id extraction (#22929, #22954)
++ [24video] Add support for 24video.vip (#23753)
+* [ivi:compilation] Fix entries extraction (#23770)
+* [ard] Improve extraction (#23761)
+ * Simplify extraction
+ + Extract age limit and series
+ * Bypass geo-restriction
++ [nbc] Add support for nbc multi network URLs (#23049)
+* [americastestkitchen] Fix extraction
+* [zype] Improve extraction
+ + Extract subtitles (#21258)
+ + Support URLs with alternative keys/tokens (#21258)
+ + Extract more metadata
+* [orf:tvthek] Improve geo restricted videos detection (#23741)
+* [soundcloud] Restore previews extraction (#23739)
+
+
+version 2020.01.15
+
+Extractors
+* [yourporn] Fix extraction (#21645, #22255, #23459)
++ [canvas] Add support for new API endpoint (#17680, #18629)
+* [ndr:base:embed] Improve thumbnails extraction (#23731)
++ [vodplatform] Add support for embed.kwikmotion.com domain
++ [twitter] Add support for promo_video_website cards (#23711)
+* [orf:radio] Clean description and improve extraction
+* [orf:fm4] Fix extraction (#23599)
+* [safari] Fix kaltura session extraction (#23679, #23670)
+* [lego] Fix extraction and extract subtitle (#23687)
+* [cloudflarestream] Improve extraction
+ + Add support for bytehighway.net domain
+ + Add support for signed URLs
+ + Extract thumbnail
+* [naver] Improve extraction
+ * Improve geo-restriction handling
+ + Extract automatic captions
+ + Extract uploader metadata
+ + Extract VLive HLS formats
+ * Improve metadata extraction
+- [pandatv] Remove extractor (#23630)
+* [dctp] Fix format extraction (#23656)
++ [scrippsnetworks] Add support for www.discovery.com videos
+* [discovery] Fix anonymous token extraction (#23650)
+* [nrktv:seriebase] Fix extraction (#23625, #23537)
+* [wistia] Improve format extraction and extract subtitles (#22590)
+* [vice] Improve extraction (#23631)
+* [redtube] Detect private videos (#23518)
+
+
+version 2020.01.01
+
+Extractors
+* [brightcove] Invalidate policy key cache on failing requests
+* [pornhub] Improve locked videos detection (#22449, #22780)
++ [pornhub] Add support for m3u8 formats
+* [pornhub] Fix extraction (#22749, #23082)
+* [brightcove] Update policy key on failing requests
+* [spankbang] Improve removed video detection (#23423)
+* [spankbang] Fix extraction (#23307, #23423, #23444)
+* [soundcloud] Automatically update client id on failing requests
+* [prosiebensat1] Improve geo restriction handling (#23571)
+* [brightcove] Cache brightcove player policy keys
+* [teachable] Fail with error message if no video URL found
+* [teachable] Improve locked lessons detection (#23528)
++ [scrippsnetworks] Add support for Scripps Networks sites (#19857, #22981)
+* [mitele] Fix extraction (#21354, #23456)
+* [soundcloud] Update client id (#23516)
+* [mailru] Relax URL regular expressions (#23509)
+
+
+version 2019.12.25
+
+Core
+* [utils] Improve str_to_int
++ [downloader/hls] Add ability to override AES decryption key URL (#17521)
+
+Extractors
+* [mediaset] Fix parse formats (#23508)
++ [tv2dk:bornholm:play] Add support for play.tv2bornholm.dk (#23291)
++ [slideslive] Add support for url and vimeo service names (#23414)
+* [slideslive] Fix extraction (#23413)
+* [twitch:clips] Fix extraction (#23375)
++ [soundcloud] Add support for token protected embeds (#18954)
+* [vk] Improve extraction
+ * Fix User Videos extraction (#23356)
+ * Extract all videos for lists with more than 1000 videos (#23356)
+ + Add support for video albums (#14327, #14492)
+- [kontrtube] Remove extractor
+- [videopremium] Remove extractor
+- [musicplayon] Remove extractor (#9225)
++ [ufctv] Add support for ufcfightpass.imgdge.com and
+ ufcfightpass.imggaming.com (#23343)
++ [twitch] Extract m3u8 formats frame rate (#23333)
++ [imggaming] Add support for playlists and extract subtitles
++ [ufcarabia] Add support for UFC Arabia (#23312)
+* [ufctv] Fix extraction
+* [yahoo] Fix gyao brightcove player id (#23303)
+* [vzaar] Override AES decryption key URL (#17521)
++ [vzaar] Add support for AES HLS manifests (#17521, #23299)
+* [nrl] Fix extraction
+* [teachingchannel] Fix extraction
+* [nintendo] Fix extraction and partially add support for Nintendo Direct
+ videos (#4592)
++ [ooyala] Add better fallback values for domain and streams variables
++ [youtube] Add support youtubekids.com (#23272)
+* [tv2] Detect DRM protection
++ [tv2] Add support for katsomo.fi and mtv.fi (#10543)
+* [tv2] Fix tv2.no article extraction
+* [msn] Improve extraction
+ + Add support for YouTube and NBCSports embeds
+ + Add support for articles with multiple videos
+ * Improve AOL embed support
+ * Improve format extraction
+* [abcotvs] Relax URL regular expression and improve metadata extraction
+ (#18014)
+* [channel9] Reduce response size
+* [adobetv] Improve extaction
+ * Use OnDemandPagedList for list extractors
+ * Reduce show extraction requests
+ * Extract original video format and subtitles
+ + Add support for adobe tv embeds
+
+
+version 2019.11.28
+
+Core
++ [utils] Add generic caesar cipher and rot47
+* [utils] Handle rd-suffixed day parts in unified_strdate (#23199)
+
+Extractors
+* [vimeo] Improve extraction
+ * Fix review extraction
+ * Fix ondemand extraction
+ * Make password protected player case as an expected error (#22896)
+ * Simplify channel based extractors code
+- [openload] Remove extractor (#11999)
+- [verystream] Remove extractor
+- [streamango] Remove extractor (#15406)
+* [dailymotion] Improve extraction
+ * Extract http formats included in m3u8 manifest
+ * Fix user extraction (#3553, #21415)
+ + Add suport for User Authentication (#11491)
+ * Fix password protected videos extraction (#23176)
+ * Respect age limit option and family filter cookie value (#18437)
+ * Handle video url playlist query param
+ * Report allowed countries for geo-restricted videos
+* [corus] Improve extraction
+ + Add support for Series Plus, W Network, YTV, ABC Spark, disneychannel.com
+ and disneylachaine.ca (#20861)
+ + Add support for self hosted videos (#22075)
+ * Detect DRM protection (#14910, #9164)
+* [vivo] Fix extraction (#22328, #22279)
++ [bitchute] Extract upload date (#22990, #23193)
+* [soundcloud] Update client id (#23214)
+
+
+version 2019.11.22
+
+Core
++ [extractor/common] Clean jwplayer description HTML tags
++ [extractor/common] Add data, headers and query to all major extract formats
+ methods
+
+Extractors
+* [chaturbate] Fix extraction (#23010, #23012)
++ [ntvru] Add support for non relative file URLs (#23140)
+* [vk] Fix wall audio thumbnails extraction (#23135)
+* [ivi] Fix format extraction (#21991)
+- [comcarcoff] Remove extractor
++ [drtv] Add support for new URL schema (#23059)
++ [nexx] Add support for Multi Player JS Setup (#23052)
++ [teamcoco] Add support for new videos (#23054)
+* [soundcloud] Check if the soundtrack has downloads left (#23045)
+* [facebook] Fix posts video data extraction (#22473)
+- [addanime] Remove extractor
+- [minhateca] Remove extractor
+- [daisuki] Remove extractor
+* [seeker] Fix extraction
+- [revision3] Remove extractors
+* [twitch] Fix video comments URL (#18593, #15828)
+* [twitter] Improve extraction
+ + Add support for generic embeds (#22168)
+ * Always extract http formats for native videos (#14934)
+ + Add support for Twitter Broadcasts (#21369)
+ + Extract more metadata
+ * Improve VMap format extraction
+ * Unify extraction code for both twitter statuses and cards
++ [twitch] Add support for Clip embed URLs
+* [lnkgo] Fix extraction (#16834)
+* [mixcloud] Improve extraction
+ * Improve metadata extraction (#11721)
+ * Fix playlist extraction (#22378)
+ * Fix user mixes extraction (#15197, #17865)
++ [kinja] Add support for Kinja embeds (#5756, #11282, #22237, #22384)
+* [onionstudios] Fix extraction
++ [hotstar] Pass Referer header to format requests (#22836)
+* [dplay] Minimize response size
++ [patreon] Extract uploader_id and filesize
+* [patreon] Minimize response size
+* [roosterteeth] Fix login request (#16094, #22689)
+
+
+version 2019.11.05
+
+Extractors
++ [scte] Add support for learning.scte.org (#22975)
++ [msn] Add support for Vidible and AOL embeds (#22195, #22227)
+* [myspass] Fix video URL extraction and improve metadata extraction (#22448)
+* [jamendo] Improve extraction
+ * Fix album extraction (#18564)
+ * Improve metadata extraction (#18565, #21379)
+* [mediaset] Relax URL guid matching (#18352)
++ [mediaset] Extract unprotected M3U and MPD manifests (#17204)
+* [telegraaf] Fix extraction
++ [bellmedia] Add support for marilyn.ca videos (#22193)
+* [stv] Fix extraction (#22928)
+- [iconosquare] Remove extractor
+- [keek] Remove extractor
+- [gameone] Remove extractor (#21778)
+- [flipagram] Remove extractor
+- [bambuser] Remove extractor
+* [wistia] Reduce embed extraction false positives
++ [wistia] Add support for inline embeds (#22931)
+- [go90] Remove extractor
+* [kakao] Remove raw request
++ [kakao] Extract format total bitrate
+* [daum] Fix VOD and Clip extracton (#15015)
+* [kakao] Improve extraction
+ + Add support for embed URLs
+ + Add support for Kakao Legacy vid based embed URLs
+ * Only extract fields used for extraction
+ * Strip description and extract tags
+* [mixcloud] Fix cloudcast data extraction (#22821)
+* [yahoo] Improve extraction
+ + Add support for live streams (#3597, #3779, #22178)
+ * Bypass cookie consent page for european domains (#16948, #22576)
+ + Add generic support for embeds (#20332)
+* [tv2] Fix and improve extraction (#22787)
++ [tv2dk] Add support for TV2 DK sites
+* [onet] Improve extraction …
+ + Add support for onet100.vod.pl
+ + Extract m3u8 formats
+ * Correct audio only format info
+* [fox9] Fix extraction
+
+
+version 2019.10.29
+
+Core
+* [utils] Actualize major IPv4 address blocks per country
+
+Extractors
++ [go] Add support for abc.com and freeform.com (#22823, #22864)
++ [mtv] Add support for mtvjapan.com
+* [mtv] Fix extraction for mtv.de (#22113)
+* [videodetective] Fix extraction
+* [internetvideoarchive] Fix extraction
+* [nbcnews] Fix extraction (#12569, #12576, #21703, #21923)
+- [hark] Remove extractor
+- [tutv] Remove extractor
+- [learnr] Remove extractor
+- [macgamestore] Remove extractor
+* [la7] Update Kaltura service URL (#22358)
+* [thesun] Fix extraction (#16966)
+- [makertv] Remove extractor
++ [tenplay] Add support for 10play.com.au (#21446)
+* [soundcloud] Improve extraction
+ * Improve format extraction (#22123)
+ + Extract uploader_id and uploader_url (#21916)
+ + Extract all known thumbnails (#19071, #20659)
+ * Fix extration for private playlists (#20976)
+ + Add support for playlist embeds (#20976)
+ * Skip preview formats (#22806)
+* [dplay] Improve extraction
+ + Add support for dplay.fi, dplay.jp and es.dplay.com (#16969)
+ * Fix it.dplay.com extraction (#22826)
+ + Extract creator, tags and thumbnails
+ * Handle playback API call errors
++ [discoverynetworks] Add support for dplay.co.uk
+* [vk] Improve extraction
+ + Add support for Odnoklassniki embeds
+ + Extract more videos from user lists (#4470)
+ + Fix wall post audio extraction (#18332)
+ * Improve error detection (#22568)
++ [odnoklassniki] Add support for embeds
+* [puhutv] Improve extraction
+ * Fix subtitles extraction
+ * Transform HLS URLs to HTTP URLs
+ * Improve metadata extraction
+* [ceskatelevize] Skip DRM media
++ [facebook] Extract subtitles (#22777)
+* [globo] Handle alternative hash signing method
+
+
+version 2019.10.22
+
+Core
+* [utils] Improve subtitles_filename (#22753)
+
+Extractors
+* [facebook] Bypass download rate limits (#21018)
++ [contv] Add support for contv.com
+- [viewster] Remove extractor
+* [xfileshare] Improve extractor (#17032, #17906, #18237, #18239)
+ * Update the list of domains
+ + Add support for aa-encoded video data
+ * Improve jwplayer format extraction
+ + Add support for Clappr sources
+* [mangomolo] Fix video format extraction and add support for player URLs
+* [audioboom] Improve metadata extraction
+* [twitch] Update VOD URL matching (#22395, #22727)
+- [mit] Remove support for video.mit.edu (#22403)
+- [servingsys] Remove extractor (#22639)
+* [dumpert] Fix extraction (#22428, #22564)
+* [atresplayer] Fix extraction (#16277, #16716)
+
+
+version 2019.10.16
+
+Core
+* [extractor/common] Make _is_valid_url more relaxed
+
+Extractors
+* [vimeo] Improve album videos id extraction (#22599)
++ [globo] Extract subtitles (#22713)
+* [bokecc] Improve player params extraction (#22638)
+* [nexx] Handle result list (#22666)
+* [vimeo] Fix VHX embed extraction
+* [nbc] Switch to graphql API (#18581, #22693, #22701)
+- [vessel] Remove extractor
+- [promptfile] Remove extractor (#6239)
+* [kaltura] Fix service URL extraction (#22658)
+* [kaltura] Fix embed info strip (#22658)
+* [globo] Fix format extraction (#20319)
+* [redtube] Improve metadata extraction (#22492, #22615)
+* [pornhub:uservideos:upload] Fix extraction (#22619)
++ [telequebec:squat] Add support for squat.telequebec.tv (#18503)
+- [wimp] Remove extractor (#22088, #22091)
++ [gfycat] Extend URL regular expression (#22225)
++ [chaturbate] Extend URL regular expression (#22309)
+* [peertube] Update instances (#22414)
++ [telequebec] Add support for coucou.telequebec.tv (#22482)
++ [xvideos] Extend URL regular expression (#22471)
+- [youtube] Remove support for invidious.enkirton.net (#22543)
++ [openload] Add support for oload.monster (#22592)
+* [nrktv:seriebase] Fix extraction (#22596)
++ [youtube] Add support for yt.lelux.fi (#22597)
+* [orf:tvthek] Make manifest requests non fatal (#22578)
+* [teachable] Skip login when already logged in (#22572)
+* [viewlift] Improve extraction (#22545)
+* [nonktube] Fix extraction (#22544)
+
+
version 2019.09.28
Core
@@ -353,7 +941,7 @@ Extractors
version 2019.04.17
Extractors
-* [openload] Randomize User-Agent (closes #20688)
+* [openload] Randomize User-Agent (#20688)
+ [openload] Add support for oladblock domains (#20471)
* [adn] Fix subtitle extraction (#12724)
+ [aol] Add support for localized websites
@@ -918,7 +1506,7 @@ Extractors
+ [youtube] Extract channel meta fields (#9676, #12939)
* [porntube] Fix extraction (#17541)
* [asiancrush] Fix extraction (#15630)
-+ [twitch:clips] Extend URL regular expression (closes #17559)
++ [twitch:clips] Extend URL regular expression (#17559)
+ [vzaar] Add support for HLS
* [tube8] Fix metadata extraction (#17520)
* [eporner] Extract JSON-LD (#17519)
diff --git a/MANIFEST.in b/MANIFEST.in
index 4e43e99f3..d2cce9a1c 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -2,8 +2,8 @@ include README.md
include LICENSE
include AUTHORS
include ChangeLog
-include youtube-dl.bash-completion
-include youtube-dl.fish
-include youtube-dl.1
+include youtube-dlc.bash-completion
+include youtube-dlc.fish
+include youtube-dlc.1
recursive-include docs Makefile conf.py *.rst
recursive-include test *
diff --git a/Makefile b/Makefile
index 3e17365b8..9588657c1 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
-all: youtube-dl README.md CONTRIBUTING.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish supportedsites
+all: youtube-dlc README.md CONTRIBUTING.md README.txt youtube-dlc.1 youtube-dlc.bash-completion youtube-dlc.zsh youtube-dlc.fish supportedsites
clean:
- rm -rf youtube-dl.1.temp.md youtube-dl.1 youtube-dl.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dl.tar.gz youtube-dl.zsh youtube-dl.fish youtube_dl/extractor/lazy_extractors.py *.dump *.part* *.ytdl *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png CONTRIBUTING.md.tmp youtube-dl youtube-dl.exe
+ rm -rf youtube-dlc.1.temp.md youtube-dlc.1 youtube-dlc.bash-completion README.txt MANIFEST build/ dist/ .coverage cover/ youtube-dlc.tar.gz youtube-dlc.zsh youtube-dlc.fish youtube_dlc/extractor/lazy_extractors.py *.dump *.part* *.ytdl *.info.json *.mp4 *.m4a *.flv *.mp3 *.avi *.mkv *.webm *.3gp *.wav *.ape *.swf *.jpg *.png CONTRIBUTING.md.tmp youtube-dlc youtube-dlc.exe
find . -name "*.pyc" -delete
find . -name "*.class" -delete
@@ -17,23 +17,23 @@ SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then ech
# set markdown input format to "markdown-smart" for pandoc version 2 and to "markdown" for pandoc prior to version 2
MARKDOWN = $(shell if [ `pandoc -v | head -n1 | cut -d" " -f2 | head -c1` = "2" ]; then echo markdown-smart; else echo markdown; fi)
-install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish
+install: youtube-dlc youtube-dlc.1 youtube-dlc.bash-completion youtube-dlc.zsh youtube-dlc.fish
install -d $(DESTDIR)$(BINDIR)
- install -m 755 youtube-dl $(DESTDIR)$(BINDIR)
+ install -m 755 youtube-dlc $(DESTDIR)$(BINDIR)
install -d $(DESTDIR)$(MANDIR)/man1
- install -m 644 youtube-dl.1 $(DESTDIR)$(MANDIR)/man1
+ install -m 644 youtube-dlc.1 $(DESTDIR)$(MANDIR)/man1
install -d $(DESTDIR)$(SYSCONFDIR)/bash_completion.d
- install -m 644 youtube-dl.bash-completion $(DESTDIR)$(SYSCONFDIR)/bash_completion.d/youtube-dl
+ install -m 644 youtube-dlc.bash-completion $(DESTDIR)$(SYSCONFDIR)/bash_completion.d/youtube-dlc
install -d $(DESTDIR)$(SHAREDIR)/zsh/site-functions
- install -m 644 youtube-dl.zsh $(DESTDIR)$(SHAREDIR)/zsh/site-functions/_youtube-dl
+ install -m 644 youtube-dlc.zsh $(DESTDIR)$(SHAREDIR)/zsh/site-functions/_youtube-dlc
install -d $(DESTDIR)$(SYSCONFDIR)/fish/completions
- install -m 644 youtube-dl.fish $(DESTDIR)$(SYSCONFDIR)/fish/completions/youtube-dl.fish
+ install -m 644 youtube-dlc.fish $(DESTDIR)$(SYSCONFDIR)/fish/completions/youtube-dlc.fish
codetest:
flake8 .
test:
- #nosetests --with-coverage --cover-package=youtube_dl --cover-html --verbose --processes 4 test
+ #nosetests --with-coverage --cover-package=youtube_dlc --cover-html --verbose --processes 4 test
nosetests --verbose test
$(MAKE) codetest
@@ -51,34 +51,34 @@ offlinetest: codetest
--exclude test_youtube_lists.py \
--exclude test_youtube_signature.py
-tar: youtube-dl.tar.gz
+tar: youtube-dlc.tar.gz
.PHONY: all clean install test tar bash-completion pypi-files zsh-completion fish-completion ot offlinetest codetest supportedsites
-pypi-files: youtube-dl.bash-completion README.txt youtube-dl.1 youtube-dl.fish
+pypi-files: youtube-dlc.bash-completion README.txt youtube-dlc.1 youtube-dlc.fish
-youtube-dl: youtube_dl/*.py youtube_dl/*/*.py
+youtube-dlc: youtube_dlc/*.py youtube_dlc/*/*.py
mkdir -p zip
- for d in youtube_dl youtube_dl/downloader youtube_dl/extractor youtube_dl/postprocessor ; do \
+ for d in youtube_dlc youtube_dlc/downloader youtube_dlc/extractor youtube_dlc/postprocessor ; do \
mkdir -p zip/$$d ;\
cp -pPR $$d/*.py zip/$$d/ ;\
done
- touch -t 200001010101 zip/youtube_dl/*.py zip/youtube_dl/*/*.py
- mv zip/youtube_dl/__main__.py zip/
- cd zip ; zip -q ../youtube-dl youtube_dl/*.py youtube_dl/*/*.py __main__.py
+ touch -t 200001010101 zip/youtube_dlc/*.py zip/youtube_dlc/*/*.py
+ mv zip/youtube_dlc/__main__.py zip/
+ cd zip ; zip -q ../youtube-dlc youtube_dlc/*.py youtube_dlc/*/*.py __main__.py
rm -rf zip
- echo '#!$(PYTHON)' > youtube-dl
- cat youtube-dl.zip >> youtube-dl
- rm youtube-dl.zip
- chmod a+x youtube-dl
+ echo '#!$(PYTHON)' > youtube-dlc
+ cat youtube-dlc.zip >> youtube-dlc
+ rm youtube-dlc.zip
+ chmod a+x youtube-dlc
-README.md: youtube_dl/*.py youtube_dl/*/*.py
- COLUMNS=80 $(PYTHON) youtube_dl/__main__.py --help | $(PYTHON) devscripts/make_readme.py
+README.md: youtube_dlc/*.py youtube_dlc/*/*.py
+ COLUMNS=80 $(PYTHON) youtube_dlc/__main__.py --help | $(PYTHON) devscripts/make_readme.py
CONTRIBUTING.md: README.md
$(PYTHON) devscripts/make_contributing.py README.md CONTRIBUTING.md
-issuetemplates: devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/1_broken_site.md .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md .github/ISSUE_TEMPLATE_tmpl/4_bug_report.md .github/ISSUE_TEMPLATE_tmpl/5_feature_request.md youtube_dl/version.py
+issuetemplates: devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/1_broken_site.md .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md .github/ISSUE_TEMPLATE_tmpl/4_bug_report.md .github/ISSUE_TEMPLATE_tmpl/5_feature_request.md youtube_dlc/version.py
$(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/1_broken_site.md .github/ISSUE_TEMPLATE/1_broken_site.md
$(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.md .github/ISSUE_TEMPLATE/2_site_support_request.md
$(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md
@@ -91,34 +91,34 @@ supportedsites:
README.txt: README.md
pandoc -f $(MARKDOWN) -t plain README.md -o README.txt
-youtube-dl.1: README.md
- $(PYTHON) devscripts/prepare_manpage.py youtube-dl.1.temp.md
- pandoc -s -f $(MARKDOWN) -t man youtube-dl.1.temp.md -o youtube-dl.1
- rm -f youtube-dl.1.temp.md
+youtube-dlc.1: README.md
+ $(PYTHON) devscripts/prepare_manpage.py youtube-dlc.1.temp.md
+ pandoc -s -f $(MARKDOWN) -t man youtube-dlc.1.temp.md -o youtube-dlc.1
+ rm -f youtube-dlc.1.temp.md
-youtube-dl.bash-completion: youtube_dl/*.py youtube_dl/*/*.py devscripts/bash-completion.in
+youtube-dlc.bash-completion: youtube_dlc/*.py youtube_dlc/*/*.py devscripts/bash-completion.in
$(PYTHON) devscripts/bash-completion.py
-bash-completion: youtube-dl.bash-completion
+bash-completion: youtube-dlc.bash-completion
-youtube-dl.zsh: youtube_dl/*.py youtube_dl/*/*.py devscripts/zsh-completion.in
+youtube-dlc.zsh: youtube_dlc/*.py youtube_dlc/*/*.py devscripts/zsh-completion.in
$(PYTHON) devscripts/zsh-completion.py
-zsh-completion: youtube-dl.zsh
+zsh-completion: youtube-dlc.zsh
-youtube-dl.fish: youtube_dl/*.py youtube_dl/*/*.py devscripts/fish-completion.in
+youtube-dlc.fish: youtube_dlc/*.py youtube_dlc/*/*.py devscripts/fish-completion.in
$(PYTHON) devscripts/fish-completion.py
-fish-completion: youtube-dl.fish
+fish-completion: youtube-dlc.fish
-lazy-extractors: youtube_dl/extractor/lazy_extractors.py
+lazy-extractors: youtube_dlc/extractor/lazy_extractors.py
-_EXTRACTOR_FILES = $(shell find youtube_dl/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py')
-youtube_dl/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
+_EXTRACTOR_FILES = $(shell find youtube_dlc/extractor -iname '*.py' -and -not -iname 'lazy_extractors.py')
+youtube_dlc/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES)
$(PYTHON) devscripts/make_lazy_extractors.py $@
-youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish ChangeLog AUTHORS
- @tar -czf youtube-dl.tar.gz --transform "s|^|youtube-dl/|" --owner 0 --group 0 \
+youtube-dlc.tar.gz: youtube-dlc README.md README.txt youtube-dlc.1 youtube-dlc.bash-completion youtube-dlc.zsh youtube-dlc.fish ChangeLog AUTHORS
+ @tar -czf youtube-dlc.tar.gz --transform "s|^|youtube-dlc/|" --owner 0 --group 0 \
--exclude '*.DS_Store' \
--exclude '*.kate-swp' \
--exclude '*.pyc' \
@@ -128,8 +128,8 @@ youtube-dl.tar.gz: youtube-dl README.md README.txt youtube-dl.1 youtube-dl.bash-
--exclude '.git' \
--exclude 'docs/_build' \
-- \
- bin devscripts test youtube_dl docs \
+ bin devscripts test youtube_dlc docs \
ChangeLog AUTHORS LICENSE README.md README.txt \
- Makefile MANIFEST.in youtube-dl.1 youtube-dl.bash-completion \
- youtube-dl.zsh youtube-dl.fish setup.py setup.cfg \
- youtube-dl
+ Makefile MANIFEST.in youtube-dlc.1 youtube-dlc.bash-completion \
+ youtube-dlc.zsh youtube-dlc.fish setup.py setup.cfg \
+ youtube-dlc
diff --git a/README.md b/README.md
index c39b13616..6ca488016 100644
--- a/README.md
+++ b/README.md
@@ -1,54 +1,57 @@
-[![Build Status](https://travis-ci.org/ytdl-org/youtube-dl.svg?branch=master)](https://travis-ci.org/ytdl-org/youtube-dl)
+[![PyPi](https://img.shields.io/pypi/v/youtube-dlc.svg)](https://pypi.org/project/youtube-dlc)
+[![Build Status](https://travis-ci.com/blackjack4494/youtube-dlc.svg?branch=master)](https://travis-ci.com/blackjack4494/youtube-dlc)
+[![Downloads](https://pepy.tech/badge/youtube-dlc)](https://pepy.tech/project/youtube-dlc)
-youtube-dl - download videos from youtube.com or other video platforms
+youtube-dlc - download videos from youtube.com or other video platforms
- [INSTALLATION](#installation)
- [DESCRIPTION](#description)
- [OPTIONS](#options)
-- [CONFIGURATION](#configuration)
-- [OUTPUT TEMPLATE](#output-template)
-- [FORMAT SELECTION](#format-selection)
-- [VIDEO SELECTION](#video-selection)
-- [FAQ](#faq)
-- [DEVELOPER INSTRUCTIONS](#developer-instructions)
-- [EMBEDDING YOUTUBE-DL](#embedding-youtube-dl)
-- [BUGS](#bugs)
- [COPYRIGHT](#copyright)
# INSTALLATION
-To install it right away for all UNIX users (Linux, macOS, etc.), type:
+**All Platforms**
+Preferred way using pip:
+You may want to use `python3` instead of `python`
- sudo curl -L https://yt-dl.org/downloads/latest/youtube-dl -o /usr/local/bin/youtube-dl
- sudo chmod a+rx /usr/local/bin/youtube-dl
+ python -m pip install --upgrade youtube-dlc
-If you do not have curl, you can alternatively use a recent wget:
+**UNIX** (Linux, macOS, etc.)
+Using wget:
- sudo wget https://yt-dl.org/downloads/latest/youtube-dl -O /usr/local/bin/youtube-dl
- sudo chmod a+rx /usr/local/bin/youtube-dl
+ sudo wget https://github.com/blackjack4494/youtube-dlc/releases/latest/download/youtube-dlc -O /usr/local/bin/youtube-dlc
+ sudo chmod a+rx /usr/local/bin/youtube-dlc
-Windows users can [download an .exe file](https://yt-dl.org/latest/youtube-dl.exe) and place it in any location on their [PATH](https://en.wikipedia.org/wiki/PATH_%28variable%29) except for `%SYSTEMROOT%\System32` (e.g. **do not** put in `C:\Windows\System32`).
+Using curl:
-You can also use pip:
+ sudo curl -L https://github.com/blackjack4494/youtube-dlc/releases/latest/download/youtube-dlc -o /usr/local/bin/youtube-dlc
+ sudo chmod a+rx /usr/local/bin/youtube-dlc
- sudo -H pip install --upgrade youtube-dl
-
-This command will update youtube-dl if you have already installed it. See the [pypi page](https://pypi.python.org/pypi/youtube_dl) for more information.
-macOS users can install youtube-dl with [Homebrew](https://brew.sh/):
+**Windows** users can download [youtube-dlc.exe](https://github.com/blackjack4494/youtube-dlc/releases/latest/download/youtube-dlc.exe) (**do not** put in `C:\Windows\System32`!).
+
+**Compile**
+To build the Windows executable yourself
- brew install youtube-dl
+ python -m pip install --upgrade pyinstaller
+ pyinstaller.exe youtube_dlc\__main__.py --onefile --name youtube-dlc
+
+Or simply execute the `make_win.bat` if pyinstaller is installed.
+There will be a `youtube-dlc.exe` in `/dist`
-Or with [MacPorts](https://www.macports.org/):
+For Unix:
+You will need the required build tools
+python, make (GNU), pandoc, zip, nosetests
+Then simply type this
- sudo port install youtube-dl
+ make
-Alternatively, refer to the [developer instructions](#developer-instructions) for how to check out and work with the git repository. For further options, including PGP signatures, see the [youtube-dl Download Page](https://ytdl-org.github.io/youtube-dl/download.html).
# DESCRIPTION
-**youtube-dl** is a command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on your Unix box, on Windows or on macOS. It is released to the public domain, which means you can modify it, redistribute it or use it however you like.
+**youtube-dlc** is a command-line program to download videos from YouTube.com and a few more sites. It requires the Python interpreter, version 2.6, 2.7, or 3.2+, and it is not platform specific. It should work on your Unix box, on Windows or on macOS. It is released to the public domain, which means you can modify it, redistribute it or use it however you like.
- youtube-dl [OPTIONS] URL [URL...]
+ youtube-dlc [OPTIONS] URL [URL...]
# OPTIONS
-h, --help Print this help text and exit
@@ -69,19 +72,19 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
extractor
--default-search PREFIX Use this prefix for unqualified URLs. For
example "gvsearch2:" downloads two videos
- from google videos for youtube-dl "large
+ from google videos for youtube-dlc "large
apple". Use the value "auto" to let
- youtube-dl guess ("auto_warning" to emit a
+ youtube-dlc guess ("auto_warning" to emit a
warning when guessing). "error" just throws
an error. The default value "fixup_error"
repairs broken URLs, but emits an error if
this is not possible instead of searching.
--ignore-config Do not read configuration files. When given
in the global configuration file
- /etc/youtube-dl.conf: Do not read the user
+ /etc/youtube-dlc.conf: Do not read the user
configuration in ~/.config/youtube-
- dl/config (%APPDATA%/youtube-dl/config.txt
- on Windows)
+ dlc/config (%APPDATA%/youtube-
+ dlc/config.txt on Windows)
--config-location PATH Location of the configuration file; either
the path to the config or its containing
directory.
@@ -238,7 +241,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
filenames
-w, --no-overwrites Do not overwrite files
-c, --continue Force resume of partially downloaded files.
- By default, youtube-dl will resume
+ By default, youtube-dlc will resume
downloads if possible.
--no-continue Do not resume partially downloaded files
(restart from beginning)
@@ -256,11 +259,11 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
option)
--cookies FILE File to read cookies from and dump cookie
jar in
- --cache-dir DIR Location in the filesystem where youtube-dl
- can store some downloaded information
+ --cache-dir DIR Location in the filesystem where youtube-
+ dlc can store some downloaded information
permanently. By default
- $XDG_CACHE_HOME/youtube-dl or
- ~/.cache/youtube-dl . At the moment, only
+ $XDG_CACHE_HOME/youtube-dlc or
+ ~/.cache/youtube-dlc . At the moment, only
YouTube player files (for videos with
obfuscated signatures) are cached, but that
may change.
@@ -306,8 +309,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
files in the current directory to debug
problems
--print-traffic Display sent and read HTTP traffic
- -C, --call-home Contact the youtube-dl server for debugging
- --no-call-home Do NOT contact the youtube-dl server for
+ -C, --call-home Contact the youtube-dlc server for
+ debugging
+ --no-call-home Do NOT contact the youtube-dlc server for
debugging
## Workarounds:
@@ -368,7 +372,7 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
## Authentication Options:
-u, --username USERNAME Login with this account ID
-p, --password PASSWORD Account password. If this option is left
- out, youtube-dl will ask interactively.
+ out, youtube-dlc will ask interactively.
-2, --twofactor TWOFACTOR Two-factor authentication code
-n, --netrc Use .netrc authentication data
--video-password PASSWORD Video password (vimeo, smotri, youku)
@@ -379,8 +383,8 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
a list of available MSOs
--ap-username USERNAME Multiple-system operator account login
--ap-password PASSWORD Multiple-system operator account password.
- If this option is left out, youtube-dl will
- ask interactively.
+ If this option is left out, youtube-dlc
+ will ask interactively.
--ap-list-mso List all supported multiple-system
operators
@@ -434,1011 +438,9 @@ Alternatively, refer to the [developer instructions](#developer-instructions) fo
either the path to the binary or its
containing directory.
--exec CMD Execute a command on the file after
- downloading, similar to find's -exec
- syntax. Example: --exec 'adb push {}
- /sdcard/Music/ && rm {}'
+ downloading and post-processing, similar to
+ find's -exec syntax. Example: --exec 'adb
+ push {} /sdcard/Music/ && rm {}'
--convert-subs FORMAT Convert the subtitles to other format
(currently supported: srt|ass|vtt|lrc)
-# CONFIGURATION
-
-You can configure youtube-dl by placing any supported command line option to a configuration file. On Linux and macOS, the system wide configuration file is located at `/etc/youtube-dl.conf` and the user wide configuration file at `~/.config/youtube-dl/config`. On Windows, the user wide configuration file locations are `%APPDATA%\youtube-dl\config.txt` or `C:\Users\<user name>\youtube-dl.conf`. Note that by default configuration file may not exist so you may need to create it yourself.
-
-For example, with the following configuration file youtube-dl will always extract the audio, not copy the mtime, use a proxy and save all videos under `Movies` directory in your home directory:
-```
-# Lines starting with # are comments
-
-# Always extract audio
--x
-
-# Do not copy the mtime
---no-mtime
-
-# Use this proxy
---proxy 127.0.0.1:3128
-
-# Save all videos under Movies directory in your home directory
--o ~/Movies/%(title)s.%(ext)s
-```
-
-Note that options in configuration file are just the same options aka switches used in regular command line calls thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`.
-
-You can use `--ignore-config` if you want to disable the configuration file for a particular youtube-dl run.
-
-You can also use `--config-location` if you want to use custom configuration file for a particular youtube-dl run.
-
-### Authentication with `.netrc` file
-
-You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every youtube-dl execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per extractor basis. For that you will need to create a `.netrc` file in your `$HOME` and restrict permissions to read/write by only you:
-```
-touch $HOME/.netrc
-chmod a-rwx,u+rw $HOME/.netrc
-```
-After that you can add credentials for an extractor in the following format, where *extractor* is the name of the extractor in lowercase:
-```
-machine <extractor> login <login> password <password>
-```
-For example:
-```
-machine youtube login myaccount@gmail.com password my_youtube_password
-machine twitch login my_twitch_account_name password my_twitch_password
-```
-To activate authentication with the `.netrc` file you should pass `--netrc` to youtube-dl or place it in the [configuration file](#configuration).
-
-On Windows you may also need to setup the `%HOME%` environment variable manually. For example:
-```
-set HOME=%USERPROFILE%
-```
-
-# OUTPUT TEMPLATE
-
-The `-o` option allows users to indicate a template for the output file names.
-
-**tl;dr:** [navigate me to examples](#output-template-examples).
-
-The basic usage is not to set any template arguments when downloading a single file, like in `youtube-dl -o funny_video.flv "https://some/video"`. However, it may contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. Allowed names along with sequence type are:
-
- - `id` (string): Video identifier
- - `title` (string): Video title
- - `url` (string): Video URL
- - `ext` (string): Video filename extension
- - `alt_title` (string): A secondary title of the video
- - `display_id` (string): An alternative identifier for the video
- - `uploader` (string): Full name of the video uploader
- - `license` (string): License name the video is licensed under
- - `creator` (string): The creator of the video
- - `release_date` (string): The date (YYYYMMDD) when the video was released
- - `timestamp` (numeric): UNIX timestamp of the moment the video became available
- - `upload_date` (string): Video upload date (YYYYMMDD)
- - `uploader_id` (string): Nickname or id of the video uploader
- - `channel` (string): Full name of the channel the video is uploaded on
- - `channel_id` (string): Id of the channel
- - `location` (string): Physical location where the video was filmed
- - `duration` (numeric): Length of the video in seconds
- - `view_count` (numeric): How many users have watched the video on the platform
- - `like_count` (numeric): Number of positive ratings of the video
- - `dislike_count` (numeric): Number of negative ratings of the video
- - `repost_count` (numeric): Number of reposts of the video
- - `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage
- - `comment_count` (numeric): Number of comments on the video
- - `age_limit` (numeric): Age restriction for the video (years)
- - `is_live` (boolean): Whether this video is a live stream or a fixed-length video
- - `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL
- - `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL
- - `format` (string): A human-readable description of the format
- - `format_id` (string): Format code specified by `--format`
- - `format_note` (string): Additional info about the format
- - `width` (numeric): Width of the video
- - `height` (numeric): Height of the video
- - `resolution` (string): Textual description of width and height
- - `tbr` (numeric): Average bitrate of audio and video in KBit/s
- - `abr` (numeric): Average audio bitrate in KBit/s
- - `acodec` (string): Name of the audio codec in use
- - `asr` (numeric): Audio sampling rate in Hertz
- - `vbr` (numeric): Average video bitrate in KBit/s
- - `fps` (numeric): Frame rate
- - `vcodec` (string): Name of the video codec in use
- - `container` (string): Name of the container format
- - `filesize` (numeric): The number of bytes, if known in advance
- - `filesize_approx` (numeric): An estimate for the number of bytes
- - `protocol` (string): The protocol that will be used for the actual download
- - `extractor` (string): Name of the extractor
- - `extractor_key` (string): Key name of the extractor
- - `epoch` (numeric): Unix epoch when creating the file
- - `autonumber` (numeric): Five-digit number that will be increased with each download, starting at zero
- - `playlist` (string): Name or id of the playlist that contains the video
- - `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according to the total length of the playlist
- - `playlist_id` (string): Playlist identifier
- - `playlist_title` (string): Playlist title
- - `playlist_uploader` (string): Full name of the playlist uploader
- - `playlist_uploader_id` (string): Nickname or id of the playlist uploader
-
-Available for the video that belongs to some logical chapter or section:
-
- - `chapter` (string): Name or title of the chapter the video belongs to
- - `chapter_number` (numeric): Number of the chapter the video belongs to
- - `chapter_id` (string): Id of the chapter the video belongs to
-
-Available for the video that is an episode of some series or programme:
-
- - `series` (string): Title of the series or programme the video episode belongs to
- - `season` (string): Title of the season the video episode belongs to
- - `season_number` (numeric): Number of the season the video episode belongs to
- - `season_id` (string): Id of the season the video episode belongs to
- - `episode` (string): Title of the video episode
- - `episode_number` (numeric): Number of the video episode within a season
- - `episode_id` (string): Id of the video episode
-
-Available for the media that is a track or a part of a music album:
-
- - `track` (string): Title of the track
- - `track_number` (numeric): Number of the track within an album or a disc
- - `track_id` (string): Id of the track
- - `artist` (string): Artist(s) of the track
- - `genre` (string): Genre(s) of the track
- - `album` (string): Title of the album the track belongs to
- - `album_type` (string): Type of the album
- - `album_artist` (string): List of all artists appeared on the album
- - `disc_number` (numeric): Number of the disc or other physical medium the track belongs to
- - `release_year` (numeric): Year (YYYY) when the album was released
-
-Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with `NA`.
-
-For example for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `youtube-dl test video` and id `BaW_jenozKcj`, this will result in a `youtube-dl test video-BaW_jenozKcj.mp4` file created in the current directory.
-
-For numeric sequences you can use numeric related formatting, for example, `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`.
-
-Output templates can also contain arbitrary hierarchical path, e.g. `-o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s'` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you.
-
-To use percent literals in an output template use `%%`. To output to stdout use `-o -`.
-
-The current default template is `%(title)s-%(id)s.%(ext)s`.
-
-In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title:
-
-#### Output template and Windows batch files
-
-If you are using an output template inside a Windows batch file then you must escape plain percent characters (`%`) by doubling, so that `-o "%(title)s-%(id)s.%(ext)s"` should become `-o "%%(title)s-%%(id)s.%%(ext)s"`. However you should not touch `%`'s that are not plain characters, e.g. environment variables for expansion should stay intact: `-o "C:\%HOMEPATH%\Desktop\%%(title)s.%%(ext)s"`.
-
-#### Output template examples
-
-Note that on Windows you may need to use double quotes instead of single.
-
-```bash
-$ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc
-youtube-dl test video ''_ä↭𝕐.mp4 # All kinds of weird characters
-
-$ youtube-dl --get-filename -o '%(title)s.%(ext)s' BaW_jenozKc --restrict-filenames
-youtube-dl_test_video_.mp4 # A simple file name
-
-# Download YouTube playlist videos in separate directory indexed by video order in a playlist
-$ youtube-dl -o '%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re
-
-# Download all playlists of YouTube channel/user keeping each playlist in separate directory:
-$ youtube-dl -o '%(uploader)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s' https://www.youtube.com/user/TheLinuxFoundation/playlists
-
-# Download Udemy course keeping each chapter in separate directory under MyVideos directory in your home
-$ youtube-dl -u user -p password -o '~/MyVideos/%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s' https://www.udemy.com/java-tutorial/
-
-# Download entire series season keeping each series and each season in separate directory under C:/MyVideos
-$ youtube-dl -o "C:/MyVideos/%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" https://videomore.ru/kino_v_detalayah/5_sezon/367617
-
-# Stream the video being downloaded to stdout
-$ youtube-dl -o - BaW_jenozKc
-```
-
-# FORMAT SELECTION
-
-By default youtube-dl tries to download the best available quality, i.e. if you want the best quality you **don't need** to pass any special options, youtube-dl will guess it for you by **default**.
-
-But sometimes you may want to download in a different format, for example when you are on a slow or intermittent connection. The key mechanism for achieving this is so-called *format selection* based on which you can explicitly specify desired format, select formats based on some criterion or criteria, setup precedence and much more.
-
-The general syntax for format selection is `--format FORMAT` or shorter `-f FORMAT` where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download.
-
-**tl;dr:** [navigate me to examples](#format-selection-examples).
-
-The simplest case is requesting a specific format, for example with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific.
-
-You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file.
-
-You can also use special names to select particular edge case formats:
-
- - `best`: Select the best quality format represented by a single file with video and audio.
- - `worst`: Select the worst quality format represented by a single file with video and audio.
- - `bestvideo`: Select the best quality video-only format (e.g. DASH video). May not be available.
- - `worstvideo`: Select the worst quality video-only format. May not be available.
- - `bestaudio`: Select the best quality audio only-format. May not be available.
- - `worstaudio`: Select the worst quality audio only-format. May not be available.
-
-For example, to download the worst quality video-only format you can use `-f worstvideo`.
-
-If you want to download multiple videos and they don't have the same formats available, you can specify the order of preference using slashes. Note that slash is left-associative, i.e. formats on the left hand side are preferred, for example `-f 22/17/18` will download format 22 if it's available, otherwise it will download format 17 if it's available, otherwise it will download format 18 if it's available, otherwise it will complain that no suitable formats are available for download.
-
-If you want to download several formats of the same video use a comma as a separator, e.g. `-f 22,17,18` will download all these three formats, of course if they are available. Or a more sophisticated example combined with the precedence feature: `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`.
-
-You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"`).
-
-The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `>=`, `=` (equals), `!=` (not equals):
-
- - `filesize`: The number of bytes, if known in advance
- - `width`: Width of the video, if known
- - `height`: Height of the video, if known
- - `tbr`: Average bitrate of audio and video in KBit/s
- - `abr`: Average audio bitrate in KBit/s
- - `vbr`: Average video bitrate in KBit/s
- - `asr`: Audio sampling rate in Hertz
- - `fps`: Frame rate
-
-Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains) and following string meta fields:
-
- - `ext`: File extension
- - `acodec`: Name of the audio codec in use
- - `vcodec`: Name of the video codec in use
- - `container`: Name of the container format
- - `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`)
- - `format_id`: A short description of the format
-
-Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain).
-
-Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.
-
-Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "[height <=? 720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 KBit/s.
-
-You can merge the video and audio of two formats into a single file using `-f <video-format>+<audio-format>` (requires ffmpeg or avconv installed), for example `-f bestvideo+bestaudio` will download the best video-only format, the best audio-only format and mux them together with ffmpeg/avconv.
-
-Format selectors can also be grouped using parentheses, for example if you want to download the best mp4 and webm formats with a height lower than 480 you can use `-f '(mp4,webm)[height<480]'`.
-
-Since the end of April 2015 and version 2015.04.26, youtube-dl uses `-f bestvideo+bestaudio/best` as the default format selection (see [#5447](https://github.com/ytdl-org/youtube-dl/issues/5447), [#5456](https://github.com/ytdl-org/youtube-dl/issues/5456)). If ffmpeg or avconv are installed this results in downloading `bestvideo` and `bestaudio` separately and muxing them together into a single file giving the best overall quality available. Otherwise it falls back to `best` and results in downloading the best available quality served as a single file. `best` is also needed for videos that don't come from YouTube because they don't provide the audio and video in two different files. If you want to only download some DASH formats (for example if you are not interested in getting videos with a resolution higher than 1080p), you can add `-f bestvideo[height<=?1080]+bestaudio/best` to your configuration file. Note that if you use youtube-dl to stream to `stdout` (and most likely to pipe it to your media player then), i.e. you explicitly specify output template as `-o -`, youtube-dl still uses `-f best` format selection in order to start content delivery immediately to your player and not to wait until `bestvideo` and `bestaudio` are downloaded and muxed.
-
-If you want to preserve the old format selection behavior (prior to youtube-dl 2015.04.26), i.e. you want to download the best available quality media served as a single file, you should explicitly specify your choice with `-f best`. You may want to add it to the [configuration file](#configuration) in order not to type it every time you run youtube-dl.
-
-#### Format selection examples
-
-Note that on Windows you may need to use double quotes instead of single.
-
-```bash
-# Download best mp4 format available or any other best if no mp4 available
-$ youtube-dl -f 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best'
-
-# Download best format available but no better than 480p
-$ youtube-dl -f 'bestvideo[height<=480]+bestaudio/best[height<=480]'
-
-# Download best video only format but no bigger than 50 MB
-$ youtube-dl -f 'best[filesize<50M]'
-
-# Download best format available via direct link over HTTP/HTTPS protocol
-$ youtube-dl -f '(bestvideo+bestaudio/best)[protocol^=http]'
-
-# Download the best video format and the best audio format without merging them
-$ youtube-dl -f 'bestvideo,bestaudio' -o '%(title)s.f%(format_id)s.%(ext)s'
-```
-Note that in the last example, an output template is recommended as bestvideo and bestaudio may have the same file name.
-
-
-# VIDEO SELECTION
-
-Videos can be filtered by their upload date using the options `--date`, `--datebefore` or `--dateafter`. They accept dates in two formats:
-
- - Absolute dates: Dates in the format `YYYYMMDD`.
- - Relative dates: Dates in the format `(now|today)[+-][0-9](day|week|month|year)(s)?`
-
-Examples:
-
-```bash
-# Download only the videos uploaded in the last 6 months
-$ youtube-dl --dateafter now-6months
-
-# Download only the videos uploaded on January 1, 1970
-$ youtube-dl --date 19700101
-
-$ # Download only the videos uploaded in the 200x decade
-$ youtube-dl --dateafter 20000101 --datebefore 20091231
-```
-
-# FAQ
-
-### How do I update youtube-dl?
-
-If you've followed [our manual installation instructions](https://ytdl-org.github.io/youtube-dl/download.html), you can simply run `youtube-dl -U` (or, on Linux, `sudo youtube-dl -U`).
-
-If you have used pip, a simple `sudo pip install -U youtube-dl` is sufficient to update.
-
-If you have installed youtube-dl using a package manager like *apt-get* or *yum*, use the standard system update mechanism to update. Note that distribution packages are often outdated. As a rule of thumb, youtube-dl releases at least once a month, and often weekly or even daily. Simply go to https://yt-dl.org to find out the current version. Unfortunately, there is nothing we youtube-dl developers can do if your distribution serves a really outdated version. You can (and should) complain to your distribution in their bugtracker or support forum.
-
-As a last resort, you can also uninstall the version installed by your package manager and follow our manual installation instructions. For that, remove the distribution's package, with a line like
-
- sudo apt-get remove -y youtube-dl
-
-Afterwards, simply follow [our manual installation instructions](https://ytdl-org.github.io/youtube-dl/download.html):
-
-```
-sudo wget https://yt-dl.org/latest/youtube-dl -O /usr/local/bin/youtube-dl
-sudo chmod a+x /usr/local/bin/youtube-dl
-hash -r
-```
-
-Again, from then on you'll be able to update with `sudo youtube-dl -U`.
-
-### youtube-dl is extremely slow to start on Windows
-
-Add a file exclusion for `youtube-dl.exe` in Windows Defender settings.
-
-### I'm getting an error `Unable to extract OpenGraph title` on YouTube playlists
-
-YouTube changed their playlist format in March 2014 and later on, so you'll need at least youtube-dl 2014.07.25 to download all YouTube videos.
-
-If you have installed youtube-dl with a package manager, pip, setup.py or a tarball, please use that to update. Note that Ubuntu packages do not seem to get updated anymore. Since we are not affiliated with Ubuntu, there is little we can do. Feel free to [report bugs](https://bugs.launchpad.net/ubuntu/+source/youtube-dl/+filebug) to the [Ubuntu packaging people](mailto:ubuntu-motu@lists.ubuntu.com?subject=outdated%20version%20of%20youtube-dl) - all they have to do is update the package to a somewhat recent version. See above for a way to update.
-
-### I'm getting an error when trying to use output template: `error: using output template conflicts with using title, video ID or auto number`
-
-Make sure you are not using `-o` with any of these options `-t`, `--title`, `--id`, `-A` or `--auto-number` set in command line or in a configuration file. Remove the latter if any.
-
-### Do I always have to pass `-citw`?
-
-By default, youtube-dl intends to have the best options (incidentally, if you have a convincing case that these should be different, [please file an issue where you explain that](https://yt-dl.org/bug)). Therefore, it is unnecessary and sometimes harmful to copy long option strings from webpages. In particular, the only option out of `-citw` that is regularly useful is `-i`.
-
-### Can you please put the `-b` option back?
-
-Most people asking this question are not aware that youtube-dl now defaults to downloading the highest available quality as reported by YouTube, which will be 1080p or 720p in some cases, so you no longer need the `-b` option. For some specific videos, maybe YouTube does not report them to be available in a specific high quality format you're interested in. In that case, simply request it with the `-f` option and youtube-dl will try to download it.
-
-### I get HTTP error 402 when trying to download a video. What's this?
-
-Apparently YouTube requires you to pass a CAPTCHA test if you download too much. We're [considering to provide a way to let you solve the CAPTCHA](https://github.com/ytdl-org/youtube-dl/issues/154), but at the moment, your best course of action is pointing a web browser to the youtube URL, solving the CAPTCHA, and restart youtube-dl.
-
-### Do I need any other programs?
-
-youtube-dl works fine on its own on most sites. However, if you want to convert video/audio, you'll need [avconv](https://libav.org/) or [ffmpeg](https://www.ffmpeg.org/). On some sites - most notably YouTube - videos can be retrieved in a higher quality format without sound. youtube-dl will detect whether avconv/ffmpeg is present and automatically pick the best option.
-
-Videos or video formats streamed via RTMP protocol can only be downloaded when [rtmpdump](https://rtmpdump.mplayerhq.hu/) is installed. Downloading MMS and RTSP videos requires either [mplayer](https://mplayerhq.hu/) or [mpv](https://mpv.io/) to be installed.
-
-### I have downloaded a video but how can I play it?
-
-Once the video is fully downloaded, use any video player, such as [mpv](https://mpv.io/), [vlc](https://www.videolan.org/) or [mplayer](https://www.mplayerhq.hu/).
-
-### I extracted a video URL with `-g`, but it does not play on another machine / in my web browser.
-
-It depends a lot on the service. In many cases, requests for the video (to download/play it) must come from the same IP address and with the same cookies and/or HTTP headers. Use the `--cookies` option to write the required cookies into a file, and advise your downloader to read cookies from that file. Some sites also require a common user agent to be used, use `--dump-user-agent` to see the one in use by youtube-dl. You can also get necessary cookies and HTTP headers from JSON output obtained with `--dump-json`.
-
-It may be beneficial to use IPv6; in some cases, the restrictions are only applied to IPv4. Some services (sometimes only for a subset of videos) do not restrict the video URL by IP address, cookie, or user-agent, but these are the exception rather than the rule.
-
-Please bear in mind that some URL protocols are **not** supported by browsers out of the box, including RTMP. If you are using `-g`, your own downloader must support these as well.
-
-If you want to play the video on a machine that is not running youtube-dl, you can relay the video content from the machine that runs youtube-dl. You can use `-o -` to let youtube-dl stream a video to stdout, or simply allow the player to download the files written by youtube-dl in turn.
-
-### ERROR: no fmt_url_map or conn information found in video info
-
-YouTube has switched to a new video info format in July 2011 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
-
-### ERROR: unable to download video
-
-YouTube requires an additional signature since September 2012 which is not supported by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
-
-### Video URL contains an ampersand and I'm getting some strange output `[1] 2839` or `'v' is not recognized as an internal or external command`
-
-That's actually the output from your shell. Since ampersand is one of the special shell characters it's interpreted by the shell preventing you from passing the whole URL to youtube-dl. To disable your shell from interpreting the ampersands (or any other special characters) you have to either put the whole URL in quotes or escape them with a backslash (which approach will work depends on your shell).
-
-For example if your URL is https://www.youtube.com/watch?t=4&v=BaW_jenozKc you should end up with following command:
-
-```youtube-dl 'https://www.youtube.com/watch?t=4&v=BaW_jenozKc'```
-
-or
-
-```youtube-dl https://www.youtube.com/watch?t=4\&v=BaW_jenozKc```
-
-For Windows you have to use the double quotes:
-
-```youtube-dl "https://www.youtube.com/watch?t=4&v=BaW_jenozKc"```
-
-### ExtractorError: Could not find JS function u'OF'
-
-In February 2015, the new YouTube player contained a character sequence in a string that was misinterpreted by old versions of youtube-dl. See [above](#how-do-i-update-youtube-dl) for how to update youtube-dl.
-
-### HTTP Error 429: Too Many Requests or 402: Payment Required
-
-These two error codes indicate that the service is blocking your IP address because of overuse. Contact the service and ask them to unblock your IP address, or - if you have acquired a whitelisted IP address already - use the [`--proxy` or `--source-address` options](#network-options) to select another IP address.
-
-### SyntaxError: Non-ASCII character
-
-The error
-
- File "youtube-dl", line 2
- SyntaxError: Non-ASCII character '\x93' ...
-
-means you're using an outdated version of Python. Please update to Python 2.6 or 2.7.
-
-### What is this binary file? Where has the code gone?
-
-Since June 2012 ([#342](https://github.com/ytdl-org/youtube-dl/issues/342)) youtube-dl is packed as an executable zipfile, simply unzip it (might need renaming to `youtube-dl.zip` first on some systems) or clone the git repository, as laid out above. If you modify the code, you can run it by executing the `__main__.py` file. To recompile the executable, run `make youtube-dl`.
-
-### The exe throws an error due to missing `MSVCR100.dll`
-
-To run the exe you need to install first the [Microsoft Visual C++ 2010 Redistributable Package (x86)](https://www.microsoft.com/en-US/download/details.aspx?id=5555).
-
-### On Windows, how should I set up ffmpeg and youtube-dl? Where should I put the exe files?
-
-If you put youtube-dl and ffmpeg in the same directory that you're running the command from, it will work, but that's rather cumbersome.
-
-To make a different directory work - either for ffmpeg, or for youtube-dl, or for both - simply create the directory (say, `C:\bin`, or `C:\Users\<User name>\bin`), put all the executables directly in there, and then [set your PATH environment variable](https://www.java.com/en/download/help/path.xml) to include that directory.
-
-From then on, after restarting your shell, you will be able to access both youtube-dl and ffmpeg (and youtube-dl will be able to find ffmpeg) by simply typing `youtube-dl` or `ffmpeg`, no matter what directory you're in.
-
-### How do I put downloads into a specific folder?
-
-Use the `-o` to specify an [output template](#output-template), for example `-o "/home/user/videos/%(title)s-%(id)s.%(ext)s"`. If you want this for all of your downloads, put the option into your [configuration file](#configuration).
-
-### How do I download a video starting with a `-`?
-
-Either prepend `https://www.youtube.com/watch?v=` or separate the ID from the options with `--`:
-
- youtube-dl -- -wNyEUrxzFU
- youtube-dl "https://www.youtube.com/watch?v=-wNyEUrxzFU"
-
-### How do I pass cookies to youtube-dl?
-
-Use the `--cookies` option, for example `--cookies /path/to/cookies/file.txt`.
-
-In order to extract cookies from browser use any conforming browser extension for exporting cookies. For example, [cookies.txt](https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) (for Chrome) or [cookies.txt](https://addons.mozilla.org/en-US/firefox/addon/cookies-txt/) (for Firefox).
-
-Note that the cookies file must be in Mozilla/Netscape format and the first line of the cookies file must be either `# HTTP Cookie File` or `# Netscape HTTP Cookie File`. Make sure you have correct [newline format](https://en.wikipedia.org/wiki/Newline) in the cookies file and convert newlines if necessary to correspond with your OS, namely `CRLF` (`\r\n`) for Windows and `LF` (`\n`) for Unix and Unix-like systems (Linux, macOS, etc.). `HTTP Error 400: Bad Request` when using `--cookies` is a good sign of invalid newline format.
-
-Passing cookies to youtube-dl is a good way to workaround login when a particular extractor does not implement it explicitly. Another use case is working around [CAPTCHA](https://en.wikipedia.org/wiki/CAPTCHA) some websites require you to solve in particular cases in order to get access (e.g. YouTube, CloudFlare).
-
-### How do I stream directly to media player?
-
-You will first need to tell youtube-dl to stream media to stdout with `-o -`, and also tell your media player to read from stdin (it must be capable of this for streaming) and then pipe former to latter. For example, streaming to [vlc](https://www.videolan.org/) can be achieved with:
-
- youtube-dl -o - "https://www.youtube.com/watch?v=BaW_jenozKcj" | vlc -
-
-### How do I download only new videos from a playlist?
-
-Use download-archive feature. With this feature you should initially download the complete playlist with `--download-archive /path/to/download/archive/file.txt` that will record identifiers of all the videos in a special file. Each subsequent run with the same `--download-archive` will download only new videos and skip all videos that have been downloaded before. Note that only successful downloads are recorded in the file.
-
-For example, at first,
-
- youtube-dl --download-archive archive.txt "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re"
-
-will download the complete `PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re` playlist and create a file `archive.txt`. Each subsequent run will only download new videos if any:
-
- youtube-dl --download-archive archive.txt "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re"
-
-### Should I add `--hls-prefer-native` into my config?
-
-When youtube-dl detects an HLS video, it can download it either with the built-in downloader or ffmpeg. Since many HLS streams are slightly invalid and ffmpeg/youtube-dl each handle some invalid cases better than the other, there is an option to switch the downloader if needed.
-
-When youtube-dl knows that one particular downloader works better for a given website, that downloader will be picked. Otherwise, youtube-dl will pick the best downloader for general compatibility, which at the moment happens to be ffmpeg. This choice may change in future versions of youtube-dl, with improvements of the built-in downloader and/or ffmpeg.
-
-In particular, the generic extractor (used when your website is not in the [list of supported sites by youtube-dl](https://ytdl-org.github.io/youtube-dl/supportedsites.html) cannot mandate one specific downloader.
-
-If you put either `--hls-prefer-native` or `--hls-prefer-ffmpeg` into your configuration, a different subset of videos will fail to download correctly. Instead, it is much better to [file an issue](https://yt-dl.org/bug) or a pull request which details why the native or the ffmpeg HLS downloader is a better choice for your use case.
-
-### Can you add support for this anime video site, or site which shows current movies for free?
-
-As a matter of policy (as well as legality), youtube-dl does not include support for services that specialize in infringing copyright. As a rule of thumb, if you cannot easily find a video that the service is quite obviously allowed to distribute (i.e. that has been uploaded by the creator, the creator's distributor, or is published under a free license), the service is probably unfit for inclusion to youtube-dl.
-
-A note on the service that they don't host the infringing content, but just link to those who do, is evidence that the service should **not** be included into youtube-dl. The same goes for any DMCA note when the whole front page of the service is filled with videos they are not allowed to distribute. A "fair use" note is equally unconvincing if the service shows copyright-protected videos in full without authorization.
-
-Support requests for services that **do** purchase the rights to distribute their content are perfectly fine though. If in doubt, you can simply include a source that mentions the legitimate purchase of content.
-
-### How can I speed up work on my issue?
-
-(Also known as: Help, my important issue not being solved!) The youtube-dl core developer team is quite small. While we do our best to solve as many issues as possible, sometimes that can take quite a while. To speed up your issue, here's what you can do:
-
-First of all, please do report the issue [at our issue tracker](https://yt-dl.org/bugs). That allows us to coordinate all efforts by users and developers, and serves as a unified point. Unfortunately, the youtube-dl project has grown too large to use personal email as an effective communication channel.
-
-Please read the [bug reporting instructions](#bugs) below. A lot of bugs lack all the necessary information. If you can, offer proxy, VPN, or shell access to the youtube-dl developers. If you are able to, test the issue from multiple computers in multiple countries to exclude local censorship or misconfiguration issues.
-
-If nobody is interested in solving your issue, you are welcome to take matters into your own hands and submit a pull request (or coerce/pay somebody else to do so).
-
-Feel free to bump the issue from time to time by writing a small comment ("Issue is still present in youtube-dl version ...from France, but fixed from Belgium"), but please not more than once a month. Please do not declare your issue as `important` or `urgent`.
-
-### How can I detect whether a given URL is supported by youtube-dl?
-
-For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from https://example.com/video/1234567 to https://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.
-
-It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor.
-
-If you want to find out whether a given URL is supported, simply call youtube-dl with it. If you get no videos back, chances are the URL is either not referring to a video or unsupported. You can find out which by examining the output (if you run youtube-dl on the console) or catching an `UnsupportedError` exception if you run it from a Python program.
-
-# Why do I need to go through that much red tape when filing bugs?
-
-Before we had the issue template, despite our extensive [bug reporting instructions](#bugs), about 80% of the issue reports we got were useless, for instance because people used ancient versions hundreds of releases old, because of simple syntactic errors (not in youtube-dl but in general shell usage), because the problem was already reported multiple times before, because people did not actually read an error message, even if it said "please install ffmpeg", because people did not mention the URL they were trying to download and many more simple, easy-to-avoid problems, many of whom were totally unrelated to youtube-dl.
-
-youtube-dl is an open-source project manned by too few volunteers, so we'd rather spend time fixing bugs where we are certain none of those simple problems apply, and where we can be reasonably confident to be able to reproduce the issue without asking the reporter repeatedly. As such, the output of `youtube-dl -v YOUR_URL_HERE` is really all that's required to file an issue. The issue template also guides you through some basic steps you can do, such as checking that your version of youtube-dl is current.
-
-# DEVELOPER INSTRUCTIONS
-
-Most users do not need to build youtube-dl and can [download the builds](https://ytdl-org.github.io/youtube-dl/download.html) or get them from their distribution.
-
-To run youtube-dl as a developer, you don't need to build anything either. Simply execute
-
- python -m youtube_dl
-
-To run the test, simply invoke your favorite test runner, or execute a test file directly; any of the following work:
-
- python -m unittest discover
- python test/test_download.py
- nosetests
-
-See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases.
-
-If you want to create a build of youtube-dl yourself, you'll need
-
-* python
-* make (only GNU make is supported)
-* pandoc
-* zip
-* nosetests
-
-### Adding support for a new site
-
-If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](README.md#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free)**. youtube-dl does **not support** such sites thus pull requests adding support for them **will be rejected**.
-
-After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`):
-
-1. [Fork this repository](https://github.com/ytdl-org/youtube-dl/fork)
-2. Check out the source code with:
-
- git clone git@github.com:YOUR_GITHUB_USERNAME/youtube-dl.git
-
-3. Start a new git branch with
-
- cd youtube-dl
- git checkout -b yourextractor
-
-4. Start with this simple template and save it to `youtube_dl/extractor/yourextractor.py`:
-
- ```python
- # coding: utf-8
- from __future__ import unicode_literals
-
- from .common import InfoExtractor
-
-
- class YourExtractorIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P<id>[0-9]+)'
- _TEST = {
- 'url': 'https://yourextractor.com/watch/42',
- 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)',
- 'info_dict': {
- 'id': '42',
- 'ext': 'mp4',
- 'title': 'Video title goes here',
- 'thumbnail': r're:^https?://.*\.jpg$',
- # TODO more properties, either as:
- # * A value
- # * MD5 checksum; start the string with md5:
- # * A regular expression; start the string with re:
- # * Any Python type (for example int or float)
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- # TODO more code goes here, for example ...
- title = self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': self._og_search_description(webpage),
- 'uploader': self._search_regex(r'<div[^>]+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False),
- # TODO more properties (see youtube_dl/extractor/common.py)
- }
- ```
-5. Add an import in [`youtube_dl/extractor/extractors.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/extractors.py).
-6. Run `python test/test_download.py TestDownload.test_YourExtractor`. This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, then rename ``_TEST`` to ``_TESTS`` and make it into a list of dictionaries. The tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in.
-7. Have a look at [`youtube_dl/extractor/common.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303). Add tests and code for as many as you want.
-8. Make sure your code follows [youtube-dl coding conventions](#youtube-dl-coding-conventions) and check the code with [flake8](http://flake8.pycqa.org/en/latest/index.html#quickstart):
-
- $ flake8 youtube_dl/extractor/yourextractor.py
-
-9. Make sure your code works under all [Python](https://www.python.org/) versions claimed supported by youtube-dl, namely 2.6, 2.7, and 3.2+.
-10. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files and [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this:
-
- $ git add youtube_dl/extractor/extractors.py
- $ git add youtube_dl/extractor/yourextractor.py
- $ git commit -m '[yourextractor] Add new extractor'
- $ git push origin yourextractor
-
-11. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it.
-
-In any case, thank you very much for your contributions!
-
-## youtube-dl coding conventions
-
-This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code.
-
-Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old youtube-dl versions working. Even though this breakage issue is easily fixed by emitting a new version of youtube-dl with a fix incorporated, all the previous versions become broken in all repositories and distros' packages that may not be so prompt in fetching the update from us. Needless to say, some non rolling release distros may never receive an update at all.
-
-### Mandatory and optional metafields
-
-For extraction to work youtube-dl relies on metadata your extractor extracts and provides to youtube-dl expressed by an [information dictionary](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L94-L303) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by youtube-dl:
-
- - `id` (media identifier)
- - `title` (media title)
- - `url` (media download URL) or `formats`
-
-In fact only the last option is technically mandatory (i.e. if you can't figure out the download location of the media the extraction does not make any sense). But by convention youtube-dl also treats `id` and `title` as mandatory. Thus the aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken.
-
-[Any field](https://github.com/ytdl-org/youtube-dl/blob/7f41a598b3fba1bcab2817de64a08941200aa3c8/youtube_dl/extractor/common.py#L188-L303) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields.
-
-#### Example
-
-Say you have some source dictionary `meta` that you've fetched as JSON with HTTP request and it has a key `summary`:
-
-```python
-meta = self._download_json(url, video_id)
-```
-
-Assume at this point `meta`'s layout is:
-
-```python
-{
- ...
- "summary": "some fancy summary text",
- ...
-}
-```
-
-Assume you want to extract `summary` and put it into the resulting info dict as `description`. Since `description` is an optional meta field you should be ready that this key may be missing from the `meta` dict, so that you should extract it like:
-
-```python
-description = meta.get('summary') # correct
-```
-
-and not like:
-
-```python
-description = meta['summary'] # incorrect
-```
-
-The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some later time but with the former approach extraction will just go ahead with `description` set to `None` which is perfectly fine (remember `None` is equivalent to the absence of data).
-
-Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance:
-
-```python
-description = self._search_regex(
- r'<span[^>]+id="title"[^>]*>([^<]+)<',
- webpage, 'description', fatal=False)
-```
-
-With `fatal` set to `False` if `_search_regex` fails to extract `description` it will emit a warning and continue extraction.
-
-You can also pass `default=<some fallback value>`, for example:
-
-```python
-description = self._search_regex(
- r'<span[^>]+id="title"[^>]*>([^<]+)<',
- webpage, 'description', default=None)
-```
-
-On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that may or may not be present.
-
-### Provide fallbacks
-
-When extracting metadata try to do so from multiple sources. For example if `title` is present in several places, try extracting from at least some of them. This makes it more future-proof in case some of the sources become unavailable.
-
-#### Example
-
-Say `meta` from the previous example has a `title` and you are about to extract it. Since `title` is a mandatory meta field you should end up with something like:
-
-```python
-title = meta['title']
-```
-
-If `title` disappears from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected.
-
-Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario:
-
-```python
-title = meta.get('title') or self._og_search_title(webpage)
-```
-
-This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`.
-
-### Regular expressions
-
-#### Don't capture groups you don't use
-
-Capturing group must be an indication that it's used somewhere in the code. Any group that is not used must be non capturing.
-
-##### Example
-
-Don't capture id attribute name here since you can't use it for anything anyway.
-
-Correct:
-
-```python
-r'(?:id|ID)=(?P<id>\d+)'
-```
-
-Incorrect:
-```python
-r'(id|ID)=(?P<id>\d+)'
-```
-
-
-#### Make regular expressions relaxed and flexible
-
-When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on.
-
-##### Example
-
-Say you need to extract `title` from the following HTML code:
-
-```html
-<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">some fancy title</span>
-```
-
-The code for that task should look similar to:
-
-```python
-title = self._search_regex(
- r'<span[^>]+class="title"[^>]*>([^<]+)', webpage, 'title')
-```
-
-Or even better:
-
-```python
-title = self._search_regex(
- r'<span[^>]+class=(["\'])title\1[^>]*>(?P<title>[^<]+)',
- webpage, 'title', group='title')
-```
-
-Note how you tolerate potential changes in the `style` attribute's value or switch from using double quotes to single for `class` attribute:
-
-The code definitely should not look like:
-
-```python
-title = self._search_regex(
- r'<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">(.*?)</span>',
- webpage, 'title', group='title')
-```
-
-### Long lines policy
-
-There is a soft limit to keep lines of code under 80 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse.
-
-For example, you should **never** split long string literals like URLs or some other often copied entities over multiple lines to fit this limit:
-
-Correct:
-
-```python
-'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
-```
-
-Incorrect:
-
-```python
-'https://www.youtube.com/watch?v=FqZTN594JQw&list='
-'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4'
-```
-
-### Inline values
-
-Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult.
-
-#### Example
-
-Correct:
-
-```python
-title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title')
-```
-
-Incorrect:
-
-```python
-TITLE_RE = r'<title>([^<]+)</title>'
-# ...some lines of code...
-title = self._html_search_regex(TITLE_RE, webpage, 'title')
-```
-
-### Collapse fallbacks
-
-Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of patterns.
-
-#### Example
-
-Good:
-
-```python
-description = self._html_search_meta(
- ['og:description', 'description', 'twitter:description'],
- webpage, 'description', default=None)
-```
-
-Unwieldy:
-
-```python
-description = (
- self._og_search_description(webpage, default=None)
- or self._html_search_meta('description', webpage, default=None)
- or self._html_search_meta('twitter:description', webpage, default=None))
-```
-
-Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`, `_og_search_property`, `_html_search_meta`.
-
-### Trailing parentheses
-
-Always move trailing parentheses after the last argument.
-
-#### Example
-
-Correct:
-
-```python
- lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
- list)
-```
-
-Incorrect:
-
-```python
- lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
- list,
-)
-```
-
-### Use convenience conversion and parsing functions
-
-Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well.
-
-Use `url_or_none` for safe URL processing.
-
-Use `try_get` for safe metadata extraction from parsed JSON.
-
-Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction.
-
-Explore [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py) for more useful convenience functions.
-
-#### More examples
-
-##### Safely extract optional description from parsed JSON
-```python
-description = try_get(response, lambda x: x['result']['video'][0]['summary'], compat_str)
-```
-
-##### Safely extract more optional metadata
-```python
-video = try_get(response, lambda x: x['result']['video'][0], dict) or {}
-description = video.get('summary')
-duration = float_or_none(video.get('durationMs'), scale=1000)
-view_count = int_or_none(video.get('views'))
-```
-
-# EMBEDDING YOUTUBE-DL
-
-youtube-dl makes the best effort to be a good command-line program, and thus should be callable from any programming language. If you encounter any problems parsing its output, feel free to [create a report](https://github.com/ytdl-org/youtube-dl/issues/new).
-
-From a Python program, you can embed youtube-dl in a more powerful fashion, like this:
-
-```python
-from __future__ import unicode_literals
-import youtube_dl
-
-ydl_opts = {}
-with youtube_dl.YoutubeDL(ydl_opts) as ydl:
- ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
-```
-
-Most likely, you'll want to use various options. For a list of options available, have a look at [`youtube_dl/YoutubeDL.py`](https://github.com/ytdl-org/youtube-dl/blob/3e4cedf9e8cd3157df2457df7274d0c842421945/youtube_dl/YoutubeDL.py#L137-L312). For a start, if you want to intercept youtube-dl's output, set a `logger` object.
-
-Here's a more complete example of a program that outputs only errors (and a short message after the download is finished), and downloads/converts the video to an mp3 file:
-
-```python
-from __future__ import unicode_literals
-import youtube_dl
-
-
-class MyLogger(object):
- def debug(self, msg):
- pass
-
- def warning(self, msg):
- pass
-
- def error(self, msg):
- print(msg)
-
-
-def my_hook(d):
- if d['status'] == 'finished':
- print('Done downloading, now converting ...')
-
-
-ydl_opts = {
- 'format': 'bestaudio/best',
- 'postprocessors': [{
- 'key': 'FFmpegExtractAudio',
- 'preferredcodec': 'mp3',
- 'preferredquality': '192',
- }],
- 'logger': MyLogger(),
- 'progress_hooks': [my_hook],
-}
-with youtube_dl.YoutubeDL(ydl_opts) as ydl:
- ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc'])
-```
-
-# BUGS
-
-Bugs and suggestions should be reported at: <https://github.com/ytdl-org/youtube-dl/issues>. Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in the IRC channel [#youtube-dl](irc://chat.freenode.net/#youtube-dl) on freenode ([webchat](https://webchat.freenode.net/?randomnick=1&channels=youtube-dl)).
-
-**Please include the full output of youtube-dl when run with `-v`**, i.e. **add** `-v` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this:
-```
-$ youtube-dl -v <your command line>
-[debug] System config: []
-[debug] User config: []
-[debug] Command-line args: [u'-v', u'https://www.youtube.com/watch?v=BaW_jenozKcj']
-[debug] Encodings: locale cp1251, fs mbcs, out cp866, pref cp1251
-[debug] youtube-dl version 2015.12.06
-[debug] Git HEAD: 135392e
-[debug] Python version 2.6.6 - Windows-2003Server-5.2.3790-SP2
-[debug] exe versions: ffmpeg N-75573-g1d0487f, ffprobe N-75573-g1d0487f, rtmpdump 2.4
-[debug] Proxy map: {}
-...
-```
-**Do not post screenshots of verbose logs; only plain text is acceptable.**
-
-The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore do not get solved in short order, if ever.
-
-Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist):
-
-### Is the description of the issue itself sufficient?
-
-We often get issue reports that we cannot really decipher. While in most cases we eventually get the required information after asking back multiple times, this poses an unnecessary drain on our resources. Many contributors, including myself, are also not native speakers, so we may misread some parts.
-
-So please elaborate on what feature you are requesting, or what bug you want to be fixed. Make sure that it's obvious
-
-- What the problem is
-- How it could be fixed
-- How your proposed solution would look like
-
-If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. As a committer myself, I often get frustrated by these issues, since the only possible way for me to move forward on them is to ask for clarification over and over.
-
-For bug reports, this means that your report should contain the *complete* output of youtube-dl when called with the `-v` flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information.
-
-If your server has multiple IPs or you suspect censorship, adding `--call-home` may be a good idea to get more diagnostics. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--dump-pages` (warning: this will yield a rather large output, redirect it to the file `log.txt` by adding `>log.txt 2>&1` to your command-line) or upload the `.dump` files you get when you add `--write-pages` [somewhere](https://gist.github.com/).
-
-**Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL.
-
-### Are you using the latest version?
-
-Before reporting any issue, type `youtube-dl -U`. This should report that you're up-to-date. About 20% of the reports we receive are already fixed, but people are using outdated versions. This goes for feature requests as well.
-
-### Is the issue already documented?
-
-Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/ytdl-org/youtube-dl/search?type=Issues) of this repository. If there is an issue, feel free to write something along the lines of "This affects me as well, with version 2015.01.01. Here is some more information on the issue: ...". While some issues may be old, a new post into them often spurs rapid activity.
-
-### Why are existing options not enough?
-
-Before requesting a new feature, please have a quick peek at [the list of supported options](https://github.com/ytdl-org/youtube-dl/blob/master/README.md#options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem.
-
-### Is there enough context in your bug report?
-
-People want to solve problems, and often think they do us a favor by breaking down their larger problems (e.g. wanting to skip already downloaded files) to a specific request (e.g. requesting us to look whether the file exists before downloading the info page). However, what often happens is that they break down the problem into two steps: One simple, and one impossible (or extremely complicated one).
-
-We are then presented with a very complicated request when the original problem could be solved far easier, e.g. by recording the downloaded video IDs in a separate file. To avoid this, you must include the greater context where it is non-obvious. In particular, every feature request that does not consist of adding support for a new site should contain a use case scenario that explains in what situation the missing feature would be useful.
-
-### Does the issue involve one problem, and one problem only?
-
-Some of our users seem to think there is a limit of issues they can or should open. There is no limit of issues they can or should open. While it may seem appealing to be able to dump all your issues into one ticket, that means that someone who solves one of your issues cannot mark the issue as closed. Typically, reporting a bunch of issues leads to the ticket lingering since nobody wants to attack that behemoth, until someone mercifully splits the issue into multiple ones.
-
-In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, White house podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of youtube-dl that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service.
-
-### Is anyone going to need the feature?
-
-Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them.
-
-### Is your question about youtube-dl?
-
-It may sound strange, but some bug reports we receive are completely unrelated to youtube-dl and relate to a different, or even the reporter's own, application. Please make sure that you are actually using youtube-dl. If you are using a UI for youtube-dl, report the bug to the maintainer of the actual application providing the UI. On the other hand, if your UI for youtube-dl fails in some way you believe is related to youtube-dl, by all means, go ahead and report the bug.
-
-# COPYRIGHT
-
-youtube-dl is released into the public domain by the copyright holders.
-
-This README file was originally written by [Daniel Bolton](https://github.com/dbbolton) and is likewise released into the public domain.
diff --git a/bin/youtube-dl b/bin/youtube-dl
deleted file mode 100755
index fc3cc8ad8..000000000
--- a/bin/youtube-dl
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/usr/bin/env python
-
-import youtube_dl
-
-if __name__ == '__main__':
- youtube_dl.main()
diff --git a/devscripts/bash-completion.in b/devscripts/bash-completion.in
index 28bd23727..1bf41f2cc 100644
--- a/devscripts/bash-completion.in
+++ b/devscripts/bash-completion.in
@@ -1,4 +1,4 @@
-__youtube_dl()
+__youtube_dlc()
{
local cur prev opts fileopts diropts keywords
COMPREPLY=()
@@ -26,4 +26,4 @@ __youtube_dl()
fi
}
-complete -F __youtube_dl youtube-dl
+complete -F __youtube_dlc youtube-dlc
diff --git a/devscripts/bash-completion.py b/devscripts/bash-completion.py
index 3d1391334..d68c9b1cc 100755
--- a/devscripts/bash-completion.py
+++ b/devscripts/bash-completion.py
@@ -6,9 +6,9 @@ from os.path import dirname as dirn
import sys
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
-import youtube_dl
+import youtube_dlc
-BASH_COMPLETION_FILE = "youtube-dl.bash-completion"
+BASH_COMPLETION_FILE = "youtube-dlc.bash-completion"
BASH_COMPLETION_TEMPLATE = "devscripts/bash-completion.in"
@@ -26,5 +26,5 @@ def build_completion(opt_parser):
f.write(filled_template)
-parser = youtube_dl.parseOpts()[0]
+parser = youtube_dlc.parseOpts()[0]
build_completion(parser)
diff --git a/devscripts/buildserver.py b/devscripts/buildserver.py
index 4a4295ba9..62dbd2cb1 100644
--- a/devscripts/buildserver.py
+++ b/devscripts/buildserver.py
@@ -12,7 +12,7 @@ import traceback
import os.path
sys.path.insert(0, os.path.dirname(os.path.dirname((os.path.abspath(__file__)))))
-from youtube_dl.compat import (
+from youtube_dlc.compat import (
compat_input,
compat_http_server,
compat_str,
@@ -325,7 +325,7 @@ class YoutubeDLBuilder(object):
authorizedUsers = ['fraca7', 'phihag', 'rg3', 'FiloSottile', 'ytdl-org']
def __init__(self, **kwargs):
- if self.repoName != 'youtube-dl':
+ if self.repoName != 'youtube-dlc':
raise BuildError('Invalid repository "%s"' % self.repoName)
if self.user not in self.authorizedUsers:
raise HTTPError('Unauthorized user "%s"' % self.user, 401)
diff --git a/devscripts/check-porn.py b/devscripts/check-porn.py
index 740f04de0..68a33d823 100644
--- a/devscripts/check-porn.py
+++ b/devscripts/check-porn.py
@@ -15,8 +15,8 @@ import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import gettestcases
-from youtube_dl.utils import compat_urllib_parse_urlparse
-from youtube_dl.utils import compat_urllib_request
+from youtube_dlc.utils import compat_urllib_parse_urlparse
+from youtube_dlc.utils import compat_urllib_request
if len(sys.argv) > 1:
METHOD = 'LIST'
diff --git a/devscripts/create-github-release.py b/devscripts/create-github-release.py
index 428111b3f..4714d81a6 100644
--- a/devscripts/create-github-release.py
+++ b/devscripts/create-github-release.py
@@ -1,7 +1,6 @@
#!/usr/bin/env python
from __future__ import unicode_literals
-import base64
import io
import json
import mimetypes
@@ -13,14 +12,13 @@ import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from youtube_dl.compat import (
+from youtube_dlc.compat import (
compat_basestring,
- compat_input,
compat_getpass,
compat_print,
compat_urllib_request,
)
-from youtube_dl.utils import (
+from youtube_dlc.utils import (
make_HTTPS_handler,
sanitized_Request,
)
@@ -40,28 +38,20 @@ class GitHubReleaser(object):
try:
info = netrc.netrc().authenticators(self._NETRC_MACHINE)
if info is not None:
- self._username = info[0]
- self._password = info[2]
+ self._token = info[2]
compat_print('Using GitHub credentials found in .netrc...')
return
else:
compat_print('No GitHub credentials found in .netrc')
except (IOError, netrc.NetrcParseError):
compat_print('Unable to parse .netrc')
- self._username = compat_input(
- 'Type your GitHub username or email address and press [Return]: ')
- self._password = compat_getpass(
- 'Type your GitHub password and press [Return]: ')
+ self._token = compat_getpass(
+ 'Type your GitHub PAT (personal access token) and press [Return]: ')
def _call(self, req):
if isinstance(req, compat_basestring):
req = sanitized_Request(req)
- # Authorizing manually since GitHub does not response with 401 with
- # WWW-Authenticate header set (see
- # https://developer.github.com/v3/#basic-authentication)
- b64 = base64.b64encode(
- ('%s:%s' % (self._username, self._password)).encode('utf-8')).decode('ascii')
- req.add_header('Authorization', 'Basic %s' % b64)
+ req.add_header('Authorization', 'token %s' % self._token)
response = self._opener.open(req).read().decode('utf-8')
return json.loads(response)
@@ -108,7 +98,7 @@ def main():
releaser = GitHubReleaser()
new_release = releaser.create_release(
- version, name='youtube-dl %s' % version, body=body)
+ version, name='youtube-dlc %s' % version, body=body)
release_id = new_release['id']
for asset in os.listdir(build_path):
diff --git a/devscripts/fish-completion.in b/devscripts/fish-completion.in
index eb79765da..4f08b6d4a 100644
--- a/devscripts/fish-completion.in
+++ b/devscripts/fish-completion.in
@@ -2,4 +2,4 @@
{{commands}}
-complete --command youtube-dl --arguments ":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
+complete --command youtube-dlc --arguments ":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory"
diff --git a/devscripts/fish-completion.py b/devscripts/fish-completion.py
index 51d19dd33..a27ef44f8 100755
--- a/devscripts/fish-completion.py
+++ b/devscripts/fish-completion.py
@@ -7,10 +7,10 @@ from os.path import dirname as dirn
import sys
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
-import youtube_dl
-from youtube_dl.utils import shell_quote
+import youtube_dlc
+from youtube_dlc.utils import shell_quote
-FISH_COMPLETION_FILE = 'youtube-dl.fish'
+FISH_COMPLETION_FILE = 'youtube-dlc.fish'
FISH_COMPLETION_TEMPLATE = 'devscripts/fish-completion.in'
EXTRA_ARGS = {
@@ -30,7 +30,7 @@ def build_completion(opt_parser):
for group in opt_parser.option_groups:
for option in group.option_list:
long_option = option.get_opt_string().strip('-')
- complete_cmd = ['complete', '--command', 'youtube-dl', '--long-option', long_option]
+ complete_cmd = ['complete', '--command', 'youtube-dlc', '--long-option', long_option]
if option._short_opts:
complete_cmd += ['--short-option', option._short_opts[0].strip('-')]
if option.help != optparse.SUPPRESS_HELP:
@@ -45,5 +45,5 @@ def build_completion(opt_parser):
f.write(filled_template)
-parser = youtube_dl.parseOpts()[0]
+parser = youtube_dlc.parseOpts()[0]
build_completion(parser)
diff --git a/devscripts/generate_aes_testdata.py b/devscripts/generate_aes_testdata.py
index e3df42cc2..c89bb547e 100644
--- a/devscripts/generate_aes_testdata.py
+++ b/devscripts/generate_aes_testdata.py
@@ -7,8 +7,8 @@ import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from youtube_dl.utils import intlist_to_bytes
-from youtube_dl.aes import aes_encrypt, key_expansion
+from youtube_dlc.utils import intlist_to_bytes
+from youtube_dlc.aes import aes_encrypt, key_expansion
secret_msg = b'Secret message goes here'
diff --git a/devscripts/gh-pages/add-version.py b/devscripts/gh-pages/add-version.py
index 867ea0048..04588a5ee 100755
--- a/devscripts/gh-pages/add-version.py
+++ b/devscripts/gh-pages/add-version.py
@@ -22,9 +22,9 @@ if 'signature' in versions_info:
new_version = {}
filenames = {
- 'bin': 'youtube-dl',
- 'exe': 'youtube-dl.exe',
- 'tar': 'youtube-dl-%s.tar.gz' % version}
+ 'bin': 'youtube-dlc',
+ 'exe': 'youtube-dlc.exe',
+ 'tar': 'youtube-dlc-%s.tar.gz' % version}
build_dir = os.path.join('..', '..', 'build', version)
for key, filename in filenames.items():
url = 'https://yt-dl.org/downloads/%s/%s' % (version, filename)
diff --git a/devscripts/gh-pages/update-feed.py b/devscripts/gh-pages/update-feed.py
index 506a62377..b07f1e830 100755
--- a/devscripts/gh-pages/update-feed.py
+++ b/devscripts/gh-pages/update-feed.py
@@ -11,24 +11,24 @@ atom_template = textwrap.dedent("""\
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<link rel="self" href="http://ytdl-org.github.io/youtube-dl/update/releases.atom" />
- <title>youtube-dl releases</title>
- <id>https://yt-dl.org/feed/youtube-dl-updates-feed</id>
+ <title>youtube-dlc releases</title>
+ <id>https://yt-dl.org/feed/youtube-dlc-updates-feed</id>
<updated>@TIMESTAMP@</updated>
@ENTRIES@
</feed>""")
entry_template = textwrap.dedent("""
<entry>
- <id>https://yt-dl.org/feed/youtube-dl-updates-feed/youtube-dl-@VERSION@</id>
+ <id>https://yt-dl.org/feed/youtube-dlc-updates-feed/youtube-dlc-@VERSION@</id>
<title>New version @VERSION@</title>
- <link href="http://ytdl-org.github.io/youtube-dl" />
+ <link href="http://ytdl-org.github.io/youtube-dlc" />
<content type="xhtml">
<div xmlns="http://www.w3.org/1999/xhtml">
Downloads available at <a href="https://yt-dl.org/downloads/@VERSION@/">https://yt-dl.org/downloads/@VERSION@/</a>
</div>
</content>
<author>
- <name>The youtube-dl maintainers</name>
+ <name>The youtube-dlc maintainers</name>
</author>
<updated>@TIMESTAMP@</updated>
</entry>
diff --git a/devscripts/gh-pages/update-sites.py b/devscripts/gh-pages/update-sites.py
index 531c93c70..38acb5d9a 100755
--- a/devscripts/gh-pages/update-sites.py
+++ b/devscripts/gh-pages/update-sites.py
@@ -5,10 +5,10 @@ import sys
import os
import textwrap
-# We must be able to import youtube_dl
+# We must be able to import youtube_dlc
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
-import youtube_dl
+import youtube_dlc
def main():
@@ -16,7 +16,7 @@ def main():
template = tmplf.read()
ie_htmls = []
- for ie in youtube_dl.list_extractors(age_limit=None):
+ for ie in youtube_dlc.list_extractors(age_limit=None):
ie_html = '<b>{}</b>'.format(ie.IE_NAME)
ie_desc = getattr(ie, 'IE_DESC', None)
if ie_desc is False:
diff --git a/devscripts/make_contributing.py b/devscripts/make_contributing.py
index 226d1a5d6..80426fb0a 100755
--- a/devscripts/make_contributing.py
+++ b/devscripts/make_contributing.py
@@ -1,9 +1,9 @@
#!/usr/bin/env python
from __future__ import unicode_literals
-import io
+# import io
import optparse
-import re
+# import re
def main():
@@ -12,22 +12,22 @@ def main():
if len(args) != 2:
parser.error('Expected an input and an output filename')
- infile, outfile = args
+
+""" infile, outfile = args
with io.open(infile, encoding='utf-8') as inf:
readme = inf.read()
- bug_text = re.search(
- r'(?s)#\s*BUGS\s*[^\n]*\s*(.*?)#\s*COPYRIGHT', readme).group(1)
- dev_text = re.search(
- r'(?s)(#\s*DEVELOPER INSTRUCTIONS.*?)#\s*EMBEDDING YOUTUBE-DL',
- readme).group(1)
+ bug_text = re.search( """
+# r'(?s)#\s*BUGS\s*[^\n]*\s*(.*?)#\s*COPYRIGHT', readme).group(1)
+# dev_text = re.search(
+# r'(?s)(#\s*DEVELOPER INSTRUCTIONS.*?)#\s*EMBEDDING youtube-dlc',
+""" readme).group(1)
out = bug_text + dev_text
with io.open(outfile, 'w', encoding='utf-8') as outf:
- outf.write(out)
-
+ outf.write(out) """
if __name__ == '__main__':
main()
diff --git a/devscripts/make_issue_template.py b/devscripts/make_issue_template.py
index b7ad23d83..37cb0d4ee 100644
--- a/devscripts/make_issue_template.py
+++ b/devscripts/make_issue_template.py
@@ -16,9 +16,9 @@ def main():
with io.open(infile, encoding='utf-8') as inf:
issue_template_tmpl = inf.read()
- # Get the version from youtube_dl/version.py without importing the package
- exec(compile(open('youtube_dl/version.py').read(),
- 'youtube_dl/version.py', 'exec'))
+ # Get the version from youtube_dlc/version.py without importing the package
+ exec(compile(open('youtube_dlc/version.py').read(),
+ 'youtube_dlc/version.py', 'exec'))
out = issue_template_tmpl % {'version': locals()['__version__']}
diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py
index 0a1762dbc..e6de72b33 100644
--- a/devscripts/make_lazy_extractors.py
+++ b/devscripts/make_lazy_extractors.py
@@ -14,8 +14,8 @@ lazy_extractors_filename = sys.argv[1]
if os.path.exists(lazy_extractors_filename):
os.remove(lazy_extractors_filename)
-from youtube_dl.extractor import _ALL_CLASSES
-from youtube_dl.extractor.common import InfoExtractor, SearchInfoExtractor
+from youtube_dlc.extractor import _ALL_CLASSES
+from youtube_dlc.extractor.common import InfoExtractor, SearchInfoExtractor
with open('devscripts/lazy_load_template.py', 'rt') as f:
module_template = f.read()
diff --git a/devscripts/make_readme.py b/devscripts/make_readme.py
index 8fbce0796..73f203582 100755
--- a/devscripts/make_readme.py
+++ b/devscripts/make_readme.py
@@ -14,7 +14,7 @@ with io.open(README_FILE, encoding='utf-8') as f:
oldreadme = f.read()
header = oldreadme[:oldreadme.index('# OPTIONS')]
-footer = oldreadme[oldreadme.index('# CONFIGURATION'):]
+# footer = oldreadme[oldreadme.index('# CONFIGURATION'):]
options = helptext[helptext.index(' General Options:') + 19:]
options = re.sub(r'(?m)^ (\w.+)$', r'## \1', options)
@@ -23,4 +23,4 @@ options = '# OPTIONS\n' + options + '\n'
with io.open(README_FILE, 'w', encoding='utf-8') as f:
f.write(header)
f.write(options)
- f.write(footer)
+ # f.write(footer)
diff --git a/devscripts/make_supportedsites.py b/devscripts/make_supportedsites.py
index 764795bc5..0ae6f8aa3 100644
--- a/devscripts/make_supportedsites.py
+++ b/devscripts/make_supportedsites.py
@@ -7,10 +7,10 @@ import os
import sys
-# Import youtube_dl
+# Import youtube_dlc
ROOT_DIR = os.path.join(os.path.dirname(__file__), '..')
sys.path.insert(0, ROOT_DIR)
-import youtube_dl
+import youtube_dlc
def main():
@@ -33,7 +33,7 @@ def main():
ie_md += ' (Currently broken)'
yield ie_md
- ies = sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME.lower())
+ ies = sorted(youtube_dlc.gen_extractors(), key=lambda i: i.IE_NAME.lower())
out = '# Supported sites\n' + ''.join(
' - ' + md + '\n'
for md in gen_ies_md(ies))
diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py
index 76bf873e1..843ade482 100644
--- a/devscripts/prepare_manpage.py
+++ b/devscripts/prepare_manpage.py
@@ -8,7 +8,7 @@ import re
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
README_FILE = os.path.join(ROOT_DIR, 'README.md')
-PREFIX = r'''%YOUTUBE-DL(1)
+PREFIX = r'''%youtube-dlc(1)
# NAME
@@ -16,7 +16,7 @@ youtube\-dl \- download videos from youtube.com or other video platforms
# SYNOPSIS
-**youtube-dl** \[OPTIONS\] URL [URL...]
+**youtube-dlc** \[OPTIONS\] URL [URL...]
'''
@@ -33,7 +33,7 @@ def main():
readme = f.read()
readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
- readme = re.sub(r'\s+youtube-dl \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
+ readme = re.sub(r'\s+youtube-dlc \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
readme = PREFIX + readme
readme = filter_options(readme)
diff --git a/devscripts/release.sh b/devscripts/release.sh
index f2411c927..04cb7fec1 100755
--- a/devscripts/release.sh
+++ b/devscripts/release.sh
@@ -53,8 +53,8 @@ fi
if [ ! -z "`git tag | grep "$version"`" ]; then echo 'ERROR: version already present'; exit 1; fi
if [ ! -z "`git status --porcelain | grep -v CHANGELOG`" ]; then echo 'ERROR: the working directory is not clean; commit or stash changes'; exit 1; fi
-useless_files=$(find youtube_dl -type f -not -name '*.py')
-if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dl: $useless_files"; exit 1; fi
+useless_files=$(find youtube_dlc -type f -not -name '*.py')
+if [ ! -z "$useless_files" ]; then echo "ERROR: Non-.py files in youtube_dlc: $useless_files"; exit 1; fi
if [ ! -f "updates_key.pem" ]; then echo 'ERROR: updates_key.pem missing'; exit 1; fi
if ! type pandoc >/dev/null 2>/dev/null; then echo 'ERROR: pandoc is missing'; exit 1; fi
if ! python3 -c 'import rsa' 2>/dev/null; then echo 'ERROR: python3-rsa is missing'; exit 1; fi
@@ -68,18 +68,18 @@ make clean
if $skip_tests ; then
echo 'SKIPPING TESTS'
else
- nosetests --verbose --with-coverage --cover-package=youtube_dl --cover-html test --stop || exit 1
+ nosetests --verbose --with-coverage --cover-package=youtube_dlc --cover-html test --stop || exit 1
fi
/bin/echo -e "\n### Changing version in version.py..."
-sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dl/version.py
+sed -i "s/__version__ = '.*'/__version__ = '$version'/" youtube_dlc/version.py
/bin/echo -e "\n### Changing version in ChangeLog..."
sed -i "s/<unreleased>/$version/" ChangeLog
-/bin/echo -e "\n### Committing documentation, templates and youtube_dl/version.py..."
+/bin/echo -e "\n### Committing documentation, templates and youtube_dlc/version.py..."
make README.md CONTRIBUTING.md issuetemplates supportedsites
-git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE/1_broken_site.md .github/ISSUE_TEMPLATE/2_site_support_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md .github/ISSUE_TEMPLATE/4_bug_report.md .github/ISSUE_TEMPLATE/5_feature_request.md .github/ISSUE_TEMPLATE/6_question.md docs/supportedsites.md youtube_dl/version.py ChangeLog
+git add README.md CONTRIBUTING.md .github/ISSUE_TEMPLATE/1_broken_site.md .github/ISSUE_TEMPLATE/2_site_support_request.md .github/ISSUE_TEMPLATE/3_site_feature_request.md .github/ISSUE_TEMPLATE/4_bug_report.md .github/ISSUE_TEMPLATE/5_feature_request.md .github/ISSUE_TEMPLATE/6_question.md docs/supportedsites.md youtube_dlc/version.py ChangeLog
git commit $gpg_sign_commits -m "release $version"
/bin/echo -e "\n### Now tagging, signing and pushing..."
@@ -94,13 +94,13 @@ git push origin "$version"
/bin/echo -e "\n### OK, now it is time to build the binaries..."
REV=$(git rev-parse HEAD)
-make youtube-dl youtube-dl.tar.gz
+make youtube-dlc youtube-dlc.tar.gz
read -p "VM running? (y/n) " -n 1
-wget "http://$buildserver/build/ytdl-org/youtube-dl/youtube-dl.exe?rev=$REV" -O youtube-dl.exe
+wget "http://$buildserver/build/ytdl-org/youtube-dl/youtube-dlc.exe?rev=$REV" -O youtube-dlc.exe
mkdir -p "build/$version"
-mv youtube-dl youtube-dl.exe "build/$version"
-mv youtube-dl.tar.gz "build/$version/youtube-dl-$version.tar.gz"
-RELEASE_FILES="youtube-dl youtube-dl.exe youtube-dl-$version.tar.gz"
+mv youtube-dlc youtube-dlc.exe "build/$version"
+mv youtube-dlc.tar.gz "build/$version/youtube-dlc-$version.tar.gz"
+RELEASE_FILES="youtube-dlc youtube-dlc.exe youtube-dlc-$version.tar.gz"
(cd build/$version/ && md5sum $RELEASE_FILES > MD5SUMS)
(cd build/$version/ && sha1sum $RELEASE_FILES > SHA1SUMS)
(cd build/$version/ && sha256sum $RELEASE_FILES > SHA2-256SUMS)
diff --git a/devscripts/show-downloads-statistics.py b/devscripts/show-downloads-statistics.py
index 6c8d1cc2d..ef90a56ab 100644
--- a/devscripts/show-downloads-statistics.py
+++ b/devscripts/show-downloads-statistics.py
@@ -9,11 +9,11 @@ import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from youtube_dl.compat import (
+from youtube_dlc.compat import (
compat_print,
compat_urllib_request,
)
-from youtube_dl.utils import format_bytes
+from youtube_dlc.utils import format_bytes
def format_size(bytes):
@@ -36,9 +36,9 @@ for page in itertools.count(1):
asset_name = asset['name']
total_bytes += asset['download_count'] * asset['size']
if all(not re.match(p, asset_name) for p in (
- r'^youtube-dl$',
- r'^youtube-dl-\d{4}\.\d{2}\.\d{2}(?:\.\d+)?\.tar\.gz$',
- r'^youtube-dl\.exe$')):
+ r'^youtube-dlc$',
+ r'^youtube-dlc-\d{4}\.\d{2}\.\d{2}(?:\.\d+)?\.tar\.gz$',
+ r'^youtube-dlc\.exe$')):
continue
compat_print(
' %s size: %s downloads: %d'
diff --git a/devscripts/zsh-completion.in b/devscripts/zsh-completion.in
index b394a1ae7..bb021862f 100644
--- a/devscripts/zsh-completion.in
+++ b/devscripts/zsh-completion.in
@@ -1,6 +1,6 @@
-#compdef youtube-dl
+#compdef youtube-dlc
-__youtube_dl() {
+__youtube_dlc() {
local curcontext="$curcontext" fileopts diropts cur prev
typeset -A opt_args
fileopts="{{fileopts}}"
@@ -25,4 +25,4 @@ __youtube_dl() {
esac
}
-__youtube_dl \ No newline at end of file
+__youtube_dlc \ No newline at end of file
diff --git a/devscripts/zsh-completion.py b/devscripts/zsh-completion.py
index 60aaf76cc..8b957144f 100755
--- a/devscripts/zsh-completion.py
+++ b/devscripts/zsh-completion.py
@@ -6,9 +6,9 @@ from os.path import dirname as dirn
import sys
sys.path.insert(0, dirn(dirn((os.path.abspath(__file__)))))
-import youtube_dl
+import youtube_dlc
-ZSH_COMPLETION_FILE = "youtube-dl.zsh"
+ZSH_COMPLETION_FILE = "youtube-dlc.zsh"
ZSH_COMPLETION_TEMPLATE = "devscripts/zsh-completion.in"
@@ -45,5 +45,5 @@ def build_completion(opt_parser):
f.write(template)
-parser = youtube_dl.parseOpts()[0]
+parser = youtube_dlc.parseOpts()[0]
build_completion(parser)
diff --git a/docs/Makefile b/docs/Makefile
index 712218045..a7159ff45 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -85,17 +85,17 @@ qthelp:
@echo
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
- @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/youtube-dl.qhcp"
+ @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/youtube-dlc.qhcp"
@echo "To view the help file:"
- @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/youtube-dl.qhc"
+ @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/youtube-dlc.qhc"
devhelp:
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
@echo
@echo "Build finished."
@echo "To view the help file:"
- @echo "# mkdir -p $$HOME/.local/share/devhelp/youtube-dl"
- @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/youtube-dl"
+ @echo "# mkdir -p $$HOME/.local/share/devhelp/youtube-dlc"
+ @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/youtube-dlc"
@echo "# devhelp"
epub:
diff --git a/docs/conf.py b/docs/conf.py
index 0aaf1b8fc..fa616ebbb 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,6 +1,6 @@
# coding: utf-8
#
-# youtube-dl documentation build configuration file, created by
+# youtube-dlc documentation build configuration file, created by
# sphinx-quickstart on Fri Mar 14 21:05:43 2014.
#
# This file is execfile()d with the current directory set to its
@@ -14,7 +14,7 @@
import sys
import os
-# Allows to import youtube_dl
+# Allows to import youtube_dlc
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# -- General configuration ------------------------------------------------
@@ -36,7 +36,7 @@ source_suffix = '.rst'
master_doc = 'index'
# General information about the project.
-project = u'youtube-dl'
+project = u'youtube-dlc'
copyright = u'2014, Ricardo Garcia Gonzalez'
# The version info for the project you're documenting, acts as replacement for
@@ -44,7 +44,7 @@ copyright = u'2014, Ricardo Garcia Gonzalez'
# built documents.
#
# The short X.Y version.
-from youtube_dl.version import __version__
+from youtube_dlc.version import __version__
version = __version__
# The full version, including alpha/beta/rc tags.
release = version
@@ -68,4 +68,4 @@ html_theme = 'default'
html_static_path = ['_static']
# Output file base name for HTML help builder.
-htmlhelp_basename = 'youtube-dldoc'
+htmlhelp_basename = 'youtube-dlcdoc'
diff --git a/docs/index.rst b/docs/index.rst
index b746ff95b..afa26fef1 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,13 +1,13 @@
-Welcome to youtube-dl's documentation!
+Welcome to youtube-dlc's documentation!
======================================
-*youtube-dl* is a command-line program to download videos from YouTube.com and more sites.
+*youtube-dlc* is a command-line program to download videos from YouTube.com and more sites.
It can also be used in Python code.
Developer guide
---------------
-This section contains information for using *youtube-dl* from Python programs.
+This section contains information for using *youtube-dlc* from Python programs.
.. toctree::
:maxdepth: 2
diff --git a/docs/module_guide.rst b/docs/module_guide.rst
index 03d72882e..6413659cf 100644
--- a/docs/module_guide.rst
+++ b/docs/module_guide.rst
@@ -1,11 +1,11 @@
-Using the ``youtube_dl`` module
+Using the ``youtube_dlc`` module
===============================
-When using the ``youtube_dl`` module, you start by creating an instance of :class:`YoutubeDL` and adding all the available extractors:
+When using the ``youtube_dlc`` module, you start by creating an instance of :class:`YoutubeDL` and adding all the available extractors:
.. code-block:: python
- >>> from youtube_dl import YoutubeDL
+ >>> from youtube_dlc import YoutubeDL
>>> ydl = YoutubeDL()
>>> ydl.add_default_info_extractors()
@@ -22,7 +22,7 @@ You use the :meth:`YoutubeDL.extract_info` method for getting the video informat
[youtube] BaW_jenozKc: Downloading video info webpage
[youtube] BaW_jenozKc: Extracting video information
>>> info['title']
- 'youtube-dl test video "\'/\\ä↭𝕐'
+ 'youtube-dlc test video "\'/\\ä↭𝕐'
>>> info['height'], info['width']
(720, 1280)
diff --git a/docs/supportedsites.md b/docs/supportedsites.md
index 35275278b..32c452267 100644
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@@ -6,7 +6,6 @@
- **23video**
- **24video**
- **3qsdn**: 3Q SDN
- - **3sat**
- **4tube**
- **56.com**
- **5min**
@@ -26,13 +25,13 @@
- **AcademicEarth:Course**
- **acast**
- **acast:channel**
- - **AddAnime**
- **ADN**: Anime Digital Network
- **AdobeConnect**
- - **AdobeTV**
- - **AdobeTVChannel**
- - **AdobeTVShow**
- - **AdobeTVVideo**
+ - **adobetv**
+ - **adobetv:channel**
+ - **adobetv:embed**
+ - **adobetv:show**
+ - **adobetv:video**
- **AdultSwim**
- **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault
- **afreecatv**: afreecatv.com
@@ -76,8 +75,6 @@
- **awaan:video**
- **AZMedien**: AZ Medien videos
- **BaiduVideo**: 百度视频
- - **bambuser**
- - **bambuser:channel**
- **Bandcamp**
- **Bandcamp:album**
- **Bandcamp:weekly**
@@ -100,6 +97,7 @@
- **BiliBili**
- **BilibiliAudio**
- **BilibiliAudioAlbum**
+ - **BiliBiliPlayer**
- **BioBioChileTV**
- **BIQLE**
- **BitChute**
@@ -177,12 +175,12 @@
- **CNN**
- **CNNArticle**
- **CNNBlogs**
- - **ComCarCoff**
- **ComedyCentral**
- **ComedyCentralFullEpisodes**
- **ComedyCentralShortname**
- **ComedyCentralTV**
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
+ - **CONtv**
- **Corus**
- **Coub**
- **Cracked**
@@ -204,8 +202,6 @@
- **dailymotion**
- **dailymotion:playlist**
- **dailymotion:user**
- - **DaisukiMotto**
- - **DaisukiMottoPlaylist**
- **daum.net**
- **daum.net:clip**
- **daum.net:playlist**
@@ -227,11 +223,11 @@
- **Disney**
- **dlive:stream**
- **dlive:vod**
+ - **DoodStream**
- **Dotsub**
- **DouyuShow**
- **DouyuTV**: 斗鱼
- **DPlay**
- - **DPlayIt**
- **DRBonanza**
- **Dropbox**
- **DrTuber**
@@ -284,12 +280,12 @@
- **FiveThirtyEight**
- **FiveTV**
- **Flickr**
- - **Flipagram**
- **Folketinget**: Folketinget (ft.dk; Danish parliament)
- **FootyRoom**
- **Formula1**
- **FOX**
- **FOX9**
+ - **FOX9News**
- **Foxgay**
- **foxnews**: Fox News and Fox Business Video
- **foxnews:article**
@@ -315,8 +311,6 @@
- **FXNetworks**
- **Gaia**
- **GameInformer**
- - **GameOne**
- - **gameone:playlist**
- **GameSpot**
- **GameStar**
- **Gaskrank**
@@ -331,14 +325,12 @@
- **Globo**
- **GloboArticle**
- **Go**
- - **Go90**
- **GodTube**
- **Golem**
- **GoogleDrive**
- **Goshgay**
- **GPUTechConf**
- **Groupon**
- - **Hark**
- **hbo**
- **HearThisAt**
- **Heise**
@@ -360,6 +352,7 @@
- **hotstar:playlist**
- **Howcast**
- **HowStuffWorks**
+ - **hrfernsehen**
- **HRTi**
- **HRTiPlaylist**
- **Huajiao**: 花椒直播
@@ -367,7 +360,6 @@
- **Hungama**
- **HungamaSong**
- **Hypem**
- - **Iconosquare**
- **ign.com**
- **imdb**: Internet Movie Database trailers
- **imdb:list**: Internet Movie Database lists
@@ -399,7 +391,6 @@
- **JeuxVideo**
- **Joj**
- **Jove**
- - **jpopsuki.tv**
- **JWPlatform**
- **Kakao**
- **Kaltura**
@@ -407,14 +398,14 @@
- **Kankan**
- **Karaoketv**
- **KarriereVideos**
- - **keek**
+ - **Katsomo**
- **KeezMovies**
- **Ketnet**
- **KhanAcademy**
- **KickStarter**
+ - **KinjaEmbed**
- **KinoPoisk**
- **KonserthusetPlay**
- - **kontrtube**: KontrTube.ru - Труба зовёт
- **KrasView**: Красвью
- **Ku6**
- **KUSI**
@@ -431,7 +422,6 @@
- **Lcp**
- **LcpPlay**
- **Le**: 乐视网
- - **Learnr**
- **Lecture2Go**
- **Lecturio**
- **LecturioCourse**
@@ -465,11 +455,9 @@
- **lynda**: lynda.com videos
- **lynda:course**: lynda.com online courses
- **m6**
- - **macgamestore**: MacGameStore trailers
- **mailru**: Видео@Mail.Ru
- **mailru:music**: Музыка@Mail.Ru
- **mailru:music:search**: Музыка@Mail.Ru
- - **MakerTV**
- **MallTV**
- **mangomolo:live**
- **mangomolo:video**
@@ -496,14 +484,12 @@
- **Mgoon**
- **MGTV**: 芒果TV
- **MiaoPai**
- - **Minhateca**
- **MinistryGrid**
- **Minoto**
- **miomio.tv**
- **MiTele**: mitele.es
- **mixcloud**
- **mixcloud:playlist**
- - **mixcloud:stream**
- **mixcloud:user**
- **Mixer:live**
- **Mixer:vod**
@@ -512,6 +498,7 @@
- **MNetTV**
- **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net
- **Mofosex**
+ - **MofosexEmbed**
- **Mojvideo**
- **Morningstar**: morningstar.com
- **Motherless**
@@ -525,11 +512,10 @@
- **mtg**: MTG services
- **mtv**
- **mtv.de**
- - **mtv81**
- **mtv:video**
+ - **mtvjapan**
- **mtvservices:embedded**
- **MuenchenTV**: münchen.tv
- - **MusicPlayOn**
- **mva**: Microsoft Virtual Academy videos
- **mva:course**: Microsoft Virtual Academy courses
- **Mwave**
@@ -634,18 +620,26 @@
- **OnionStudios**
- **Ooyala**
- **OoyalaExternal**
- - **Openload**
- **OraTV**
+ - **orf:burgenland**: Radio Burgenland
- **orf:fm4**: radio FM4
- **orf:fm4:story**: fm4.orf.at stories
- **orf:iptv**: iptv.ORF.at
+ - **orf:kaernten**: Radio Kärnten
+ - **orf:noe**: Radio Niederösterreich
+ - **orf:oberoesterreich**: Radio Oberösterreich
- **orf:oe1**: Radio Österreich 1
+ - **orf:oe3**: Radio Österreich 3
+ - **orf:salzburg**: Radio Salzburg
+ - **orf:steiermark**: Radio Steiermark
+ - **orf:tirol**: Radio Tirol
- **orf:tvthek**: ORF TVthek
+ - **orf:vorarlberg**: Radio Vorarlberg
+ - **orf:wien**: Radio Wien
- **OsnatelTV**
- **OutsideTV**
- **PacktPub**
- **PacktPubCourse**
- - **PandaTV**: 熊猫TV
- **pandora.tv**: 판도라TV
- **ParamountNetwork**
- **parliamentlive.tv**: UK parliament videos
@@ -679,8 +673,10 @@
- **plus.google**: Google Plus
- **podomatic**
- **Pokemon**
+ - **PokemonWatch**
- **PolskieRadio**
- **PolskieRadioCategory**
+ - **Popcorntimes**
- **PopcornTV**
- **PornCom**
- **PornerBros**
@@ -694,7 +690,6 @@
- **PornoXO**
- **PornTube**
- **PressTV**
- - **PromptFile**
- **prosiebensat1**: ProSiebenSat.1 Digital
- **puhutv**
- **puhutv:serie**
@@ -735,8 +730,6 @@
- **Restudy**
- **Reuters**
- **ReverbNation**
- - **revision**
- - **revision3:embed**
- **RICE**
- **RMCDecouverte**
- **RockstarGames**
@@ -781,11 +774,13 @@
- **screen.yahoo:search**: Yahoo screen search
- **Screencast**
- **ScreencastOMatic**
+ - **ScrippsNetworks**
- **scrippsnetworks:watch**
+ - **SCTE**
+ - **SCTECourse**
- **Seeker**
- **SenateISVP**
- **SendtoNews**
- - **ServingSys**
- **Servus**
- **Sexu**
- **SeznamZpravy**
@@ -816,6 +811,7 @@
- **soundcloud:set**
- **soundcloud:trackstation**
- **soundcloud:user**
+ - **SoundcloudEmbed**
- **soundgasm**
- **soundgasm:profile**
- **southpark.cc.com**
@@ -841,8 +837,10 @@
- **stanfordoc**: Stanford Open ClassRoom
- **Steam**
- **Stitcher**
+ - **StoryFire**
+ - **StoryFireSeries**
+ - **StoryFireUser**
- **Streamable**
- - **Streamango**
- **streamcloud.eu**
- **StreamCZ**
- **StreetVoice**
@@ -884,9 +882,11 @@
- **TeleQuebec**
- **TeleQuebecEmission**
- **TeleQuebecLive**
+ - **TeleQuebecSquat**
- **TeleTask**
- **Telewebion**
- **TennisTV**
+ - **TenPlay**
- **TF1**
- **TFO**
- **TheIntercept**
@@ -925,11 +925,12 @@
- **tunein:topic**
- **TunePk**
- **Turbo**
- - **Tutv**
- **tv.dfb.de**
- **TV2**
- **tv2.hu**
- **TV2Article**
+ - **TV2DK**
+ - **TV2DKBornholmPlay**
- **TV4**: tv4.se and tv4play.se
- **TV5MondePlus**: TV5MONDE+
- **TVA**
@@ -966,10 +967,12 @@
- **twitch:vod**
- **twitter**
- **twitter:amplify**
+ - **twitter:broadcast**
- **twitter:card**
- **udemy**
- **udemy:course**
- **UDNEmbed**: 聯合影音
+ - **UFCArabia**
- **UFCTV**
- **UKTVPlay**
- **umg:de**: Universal Music Deutschland
@@ -990,8 +993,6 @@
- **Vbox7**
- **VeeHD**
- **Veoh**
- - **verystream**
- - **Vessel**
- **Vesti**: Вести.Ru
- **Vevo**
- **VevoPlaylist**
@@ -1006,13 +1007,11 @@
- **Viddler**
- **Videa**
- **video.google:search**: Google Video search
- - **video.mit.edu**
- **VideoDetective**
- **videofy.me**
- **videomore**
- **videomore:season**
- **videomore:video**
- - **VideoPremium**
- **VideoPress**
- **Vidio**
- **VidLii**
@@ -1022,9 +1021,8 @@
- **Vidzi**
- **vier**: vier.be and vijf.be
- **vier:videos**
- - **ViewLift**
- - **ViewLiftEmbed**
- - **Viewster**
+ - **viewlift**
+ - **viewlift:embed**
- **Viidea**
- **viki**
- **viki:channel**
@@ -1090,7 +1088,6 @@
- **Weibo**
- **WeiboMobile**
- **WeiqiTV**: WQTV
- - **Wimp**
- **Wistia**
- **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
- **WorldStarHipHop**
@@ -1099,7 +1096,7 @@
- **WWE**
- **XBef**
- **XboxClips**
- - **XFileShare**: XFileShare based sites: DaClips, FileHoot, GorillaVid, MovPod, PowerWatch, Rapidvideo.ws, TheVideoBee, Vidto, Streamin.To, XVIDSTAGE, Vid ABC, VidBom, vidlo, RapidVideo.TV, FastVideo.me
+ - **XFileShare**: XFileShare based sites: ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, XVideoSharing
- **XHamster**
- **XHamsterEmbed**
- **XHamsterUser**
@@ -1158,7 +1155,7 @@
- **Zaq1**
- **Zattoo**
- **ZattooLive**
- - **ZDF**
+ - **ZDF-3sat**
- **ZDFChannel**
- **zingmp3**: mp3.zing.vn
- **Zype**
diff --git a/make_win.bat b/make_win.bat
new file mode 100644
index 000000000..c5caac08f
--- /dev/null
+++ b/make_win.bat
@@ -0,0 +1 @@
+pyinstaller.exe youtube_dlc\__main__.py --onefile --name youtube-dlc \ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
index da78a9c47..f658aaa0a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -2,5 +2,5 @@
universal = True
[flake8]
-exclude = youtube_dl/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git,venv
+exclude = youtube_dlc/extractor/__init__.py,devscripts/buildserver.py,devscripts/lazy_load_template.py,devscripts/make_issue_template.py,setup.py,build,.git,venv
ignore = E402,E501,E731,E741,W503
diff --git a/setup.py b/setup.py
index af68b485e..f5f0bae62 100644
--- a/setup.py
+++ b/setup.py
@@ -1,68 +1,27 @@
#!/usr/bin/env python
# coding: utf-8
-from __future__ import print_function
-
+from setuptools import setup, Command, find_packages
import os.path
import warnings
import sys
-
-try:
- from setuptools import setup, Command
- setuptools_available = True
-except ImportError:
- from distutils.core import setup, Command
- setuptools_available = False
from distutils.spawn import spawn
-try:
- # This will create an exe that needs Microsoft Visual C++ 2008
- # Redistributable Package
- import py2exe
-except ImportError:
- if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
- print('Cannot import py2exe', file=sys.stderr)
- exit(1)
-
-py2exe_options = {
- 'bundle_files': 1,
- 'compressed': 1,
- 'optimize': 2,
- 'dist_dir': '.',
- 'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'],
-}
-
-# Get the version from youtube_dl/version.py without importing the package
-exec(compile(open('youtube_dl/version.py').read(),
- 'youtube_dl/version.py', 'exec'))
-
-DESCRIPTION = 'YouTube video downloader'
-LONG_DESCRIPTION = 'Command-line program to download videos from YouTube.com and other video sites'
+# Get the version from youtube_dlc/version.py without importing the package
+exec(compile(open('youtube_dlc/version.py').read(),
+ 'youtube_dlc/version.py', 'exec'))
-py2exe_console = [{
- 'script': './youtube_dl/__main__.py',
- 'dest_base': 'youtube-dl',
- 'version': __version__,
- 'description': DESCRIPTION,
- 'comments': LONG_DESCRIPTION,
- 'product_name': 'youtube-dl',
- 'product_version': __version__,
-}]
-
-py2exe_params = {
- 'console': py2exe_console,
- 'options': {'py2exe': py2exe_options},
- 'zipfile': None
-}
+DESCRIPTION = 'Media downloader supporting various sites such as youtube'
+LONG_DESCRIPTION = 'Command-line program to download videos from YouTube.com and other video sites. Based on a more active community fork.'
if len(sys.argv) >= 2 and sys.argv[1] == 'py2exe':
- params = py2exe_params
+ print("inv")
else:
files_spec = [
- ('etc/bash_completion.d', ['youtube-dl.bash-completion']),
- ('etc/fish/completions', ['youtube-dl.fish']),
- ('share/doc/youtube_dl', ['README.txt']),
- ('share/man/man1', ['youtube-dl.1'])
+ ('etc/bash_completion.d', ['youtube-dlc.bash-completion']),
+ ('etc/fish/completions', ['youtube-dlc.fish']),
+ ('share/doc/youtube_dlc', ['README.txt']),
+ ('share/man/man1', ['youtube-dlc.1'])
]
root = os.path.dirname(os.path.abspath(__file__))
data_files = []
@@ -78,10 +37,10 @@ else:
params = {
'data_files': data_files,
}
- if setuptools_available:
- params['entry_points'] = {'console_scripts': ['youtube-dl = youtube_dl:main']}
- else:
- params['scripts'] = ['bin/youtube-dl']
+ #if setuptools_available:
+ params['entry_points'] = {'console_scripts': ['youtube-dlc = youtube_dlc:main']}
+ #else:
+ # params['scripts'] = ['bin/youtube-dlc']
class build_lazy_extractors(Command):
description = 'Build the extractor lazy loading module'
@@ -95,54 +54,50 @@ class build_lazy_extractors(Command):
def run(self):
spawn(
- [sys.executable, 'devscripts/make_lazy_extractors.py', 'youtube_dl/extractor/lazy_extractors.py'],
+ [sys.executable, 'devscripts/make_lazy_extractors.py', 'youtube_dlc/extractor/lazy_extractors.py'],
dry_run=self.dry_run,
)
setup(
- name='youtube_dl',
+ name="youtube_dlc",
version=__version__,
+ maintainer="Tom-Oliver Heidel",
+ maintainer_email="theidel@uni-bremen.de",
description=DESCRIPTION,
long_description=LONG_DESCRIPTION,
- url='https://github.com/ytdl-org/youtube-dl',
- author='Ricardo Garcia',
- author_email='ytdl@yt-dl.org',
- maintainer='Sergey M.',
- maintainer_email='dstftw@gmail.com',
- license='Unlicense',
- packages=[
- 'youtube_dl',
- 'youtube_dl.extractor', 'youtube_dl.downloader',
- 'youtube_dl.postprocessor'],
-
- # Provokes warning on most systems (why?!)
- # test_suite = 'nose.collector',
- # test_requires = ['nosetest'],
-
+ # long_description_content_type="text/markdown",
+ url="https://github.com/blackjack4494/youtube-dlc",
+ packages=find_packages(exclude=("youtube_dl",)),
+ #packages=[
+ # 'youtube_dlc',
+ # 'youtube_dlc.extractor', 'youtube_dlc.downloader',
+ # 'youtube_dlc.postprocessor'],
classifiers=[
- 'Topic :: Multimedia :: Video',
- 'Development Status :: 5 - Production/Stable',
- 'Environment :: Console',
- 'License :: Public Domain',
- 'Programming Language :: Python',
- 'Programming Language :: Python :: 2',
- 'Programming Language :: Python :: 2.6',
- 'Programming Language :: Python :: 2.7',
- 'Programming Language :: Python :: 3',
- 'Programming Language :: Python :: 3.2',
- 'Programming Language :: Python :: 3.3',
- 'Programming Language :: Python :: 3.4',
- 'Programming Language :: Python :: 3.5',
- 'Programming Language :: Python :: 3.6',
- 'Programming Language :: Python :: 3.7',
- 'Programming Language :: Python :: 3.8',
- 'Programming Language :: Python :: Implementation',
- 'Programming Language :: Python :: Implementation :: CPython',
- 'Programming Language :: Python :: Implementation :: IronPython',
- 'Programming Language :: Python :: Implementation :: Jython',
- 'Programming Language :: Python :: Implementation :: PyPy',
+ "Topic :: Multimedia :: Video",
+ "Development Status :: 5 - Production/Stable",
+ "Environment :: Console",
+ "Programming Language :: Python",
+ "Programming Language :: Python :: 2",
+ "Programming Language :: Python :: 2.6",
+ "Programming Language :: Python :: 2.7",
+ "Programming Language :: Python :: 3",
+ "Programming Language :: Python :: 3.2",
+ "Programming Language :: Python :: 3.3",
+ "Programming Language :: Python :: 3.4",
+ "Programming Language :: Python :: 3.5",
+ "Programming Language :: Python :: 3.6",
+ "Programming Language :: Python :: 3.7",
+ "Programming Language :: Python :: 3.8",
+ "Programming Language :: Python :: Implementation",
+ "Programming Language :: Python :: Implementation :: CPython",
+ "Programming Language :: Python :: Implementation :: IronPython",
+ "Programming Language :: Python :: Implementation :: Jython",
+ "Programming Language :: Python :: Implementation :: PyPy",
+ "License :: Public Domain",
+ "Operating System :: OS Independent",
],
-
- cmdclass={'build_lazy_extractors': build_lazy_extractors},
+ python_requires='>=2.6',
+
+ cmdclass={'build_lazy_extractors': build_lazy_extractors},
**params
-)
+) \ No newline at end of file
diff --git a/test/helper.py b/test/helper.py
index e62aab11e..f45818b0f 100644
--- a/test/helper.py
+++ b/test/helper.py
@@ -10,13 +10,13 @@ import types
import ssl
import sys
-import youtube_dl.extractor
-from youtube_dl import YoutubeDL
-from youtube_dl.compat import (
+import youtube_dlc.extractor
+from youtube_dlc import YoutubeDL
+from youtube_dlc.compat import (
compat_os_name,
compat_str,
)
-from youtube_dl.utils import (
+from youtube_dlc.utils import (
preferredencoding,
write_string,
)
@@ -90,7 +90,7 @@ class FakeYDL(YoutubeDL):
def gettestcases(include_onlymatching=False):
- for ie in youtube_dl.extractor.gen_extractors():
+ for ie in youtube_dlc.extractor.gen_extractors():
for tc in ie.get_testcases(include_onlymatching):
yield tc
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index 71f6608fe..bdd01e41a 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -10,10 +10,10 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
-from youtube_dl.compat import compat_etree_fromstring, compat_http_server
-from youtube_dl.extractor.common import InfoExtractor
-from youtube_dl.extractor import YoutubeIE, get_info_extractor
-from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
+from youtube_dlc.compat import compat_etree_fromstring, compat_http_server
+from youtube_dlc.extractor.common import InfoExtractor
+from youtube_dlc.extractor import YoutubeIE, get_info_extractor
+from youtube_dlc.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
import threading
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index ce9666171..6d02c2a54 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -12,12 +12,12 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import copy
from test.helper import FakeYDL, assertRegexpMatches
-from youtube_dl import YoutubeDL
-from youtube_dl.compat import compat_str, compat_urllib_error
-from youtube_dl.extractor import YoutubeIE
-from youtube_dl.extractor.common import InfoExtractor
-from youtube_dl.postprocessor.common import PostProcessor
-from youtube_dl.utils import ExtractorError, match_filter_func
+from youtube_dlc import YoutubeDL
+from youtube_dlc.compat import compat_str, compat_urllib_error
+from youtube_dlc.extractor import YoutubeIE
+from youtube_dlc.extractor.common import InfoExtractor
+from youtube_dlc.postprocessor.common import PostProcessor
+from youtube_dlc.utils import ExtractorError, match_filter_func
TEST_URL = 'http://localhost/sample.mp4'
@@ -816,11 +816,15 @@ class TestYoutubeDL(unittest.TestCase):
'webpage_url': 'http://example.com',
}
- def get_ids(params):
+ def get_downloaded_info_dicts(params):
ydl = YDL(params)
- # make a copy because the dictionary can be modified
- ydl.process_ie_result(playlist.copy())
- return [int(v['id']) for v in ydl.downloaded_info_dicts]
+ # make a deep copy because the dictionary and nested entries
+ # can be modified
+ ydl.process_ie_result(copy.deepcopy(playlist))
+ return ydl.downloaded_info_dicts
+
+ def get_ids(params):
+ return [int(v['id']) for v in get_downloaded_info_dicts(params)]
result = get_ids({})
self.assertEqual(result, [1, 2, 3, 4])
@@ -852,6 +856,22 @@ class TestYoutubeDL(unittest.TestCase):
result = get_ids({'playlist_items': '2-4,3-4,3'})
self.assertEqual(result, [2, 3, 4])
+ # Tests for https://github.com/ytdl-org/youtube-dl/issues/10591
+ # @{
+ result = get_downloaded_info_dicts({'playlist_items': '2-4,3-4,3'})
+ self.assertEqual(result[0]['playlist_index'], 2)
+ self.assertEqual(result[1]['playlist_index'], 3)
+
+ result = get_downloaded_info_dicts({'playlist_items': '2-4,3-4,3'})
+ self.assertEqual(result[0]['playlist_index'], 2)
+ self.assertEqual(result[1]['playlist_index'], 3)
+ self.assertEqual(result[2]['playlist_index'], 4)
+
+ result = get_downloaded_info_dicts({'playlist_items': '4,2'})
+ self.assertEqual(result[0]['playlist_index'], 4)
+ self.assertEqual(result[1]['playlist_index'], 2)
+ # @}
+
def test_urlopen_no_file_protocol(self):
# see https://github.com/ytdl-org/youtube-dl/issues/8227
ydl = YDL()
diff --git a/test/test_YoutubeDLCookieJar.py b/test/test_YoutubeDLCookieJar.py
index f959798de..615d8a9d8 100644
--- a/test/test_YoutubeDLCookieJar.py
+++ b/test/test_YoutubeDLCookieJar.py
@@ -10,7 +10,7 @@ import tempfile
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from youtube_dl.utils import YoutubeDLCookieJar
+from youtube_dlc.utils import YoutubeDLCookieJar
class TestYoutubeDLCookieJar(unittest.TestCase):
@@ -39,6 +39,13 @@ class TestYoutubeDLCookieJar(unittest.TestCase):
assert_cookie_has_value('HTTPONLY_COOKIE')
assert_cookie_has_value('JS_ACCESSIBLE_COOKIE')
+ def test_malformed_cookies(self):
+ cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/malformed_cookies.txt')
+ cookiejar.load(ignore_discard=True, ignore_expires=True)
+ # Cookies should be empty since all malformed cookie file entries
+ # will be ignored
+ self.assertFalse(cookiejar._cookies)
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_aes.py b/test/test_aes.py
index cc89fb6ab..ef1e1b189 100644
--- a/test/test_aes.py
+++ b/test/test_aes.py
@@ -8,8 +8,8 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from youtube_dl.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_cbc_encrypt, aes_decrypt_text
-from youtube_dl.utils import bytes_to_intlist, intlist_to_bytes
+from youtube_dlc.aes import aes_decrypt, aes_encrypt, aes_cbc_decrypt, aes_cbc_encrypt, aes_decrypt_text
+from youtube_dlc.utils import bytes_to_intlist, intlist_to_bytes
import base64
# the encrypted data can be generate with 'devscripts/generate_aes_testdata.py'
diff --git a/test/test_age_restriction.py b/test/test_age_restriction.py
index 6f5513faa..b73bdd767 100644
--- a/test/test_age_restriction.py
+++ b/test/test_age_restriction.py
@@ -10,7 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import try_rm
-from youtube_dl import YoutubeDL
+from youtube_dlc import YoutubeDL
def _download_restricted(url, filename, age):
diff --git a/test/test_all_urls.py b/test/test_all_urls.py
index 465ce0050..7b6664cac 100644
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -12,7 +12,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import gettestcases
-from youtube_dl.extractor import (
+from youtube_dlc.extractor import (
FacebookIE,
gen_extractors,
YoutubeIE,
@@ -70,7 +70,7 @@ class TestAllURLsMatching(unittest.TestCase):
def test_youtube_search_matching(self):
self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
- self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
+ self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dlc+test+video&filters=video&lclk=video', ['youtube:search_url'])
def test_youtube_extract(self):
assertExtractId = lambda url, id: self.assertEqual(YoutubeIE.extract_id(url), id)
@@ -123,12 +123,6 @@ class TestAllURLsMatching(unittest.TestCase):
self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['pbs'])
self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['pbs'])
- def test_yahoo_https(self):
- # https://github.com/ytdl-org/youtube-dl/issues/2701
- self.assertMatch(
- 'https://screen.yahoo.com/smartwatches-latest-wearable-gadgets-163745379-cbs.html',
- ['Yahoo'])
-
def test_no_duplicated_ie_names(self):
name_accu = collections.defaultdict(list)
for ie in self.ies:
diff --git a/test/test_cache.py b/test/test_cache.py
index a16160142..1167519d1 100644
--- a/test/test_cache.py
+++ b/test/test_cache.py
@@ -13,7 +13,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL
-from youtube_dl.cache import Cache
+from youtube_dlc.cache import Cache
def _is_empty(d):
diff --git a/test/test_compat.py b/test/test_compat.py
index 86ff389fd..8c49a001e 100644
--- a/test/test_compat.py
+++ b/test/test_compat.py
@@ -10,7 +10,7 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from youtube_dl.compat import (
+from youtube_dlc.compat import (
compat_getenv,
compat_setenv,
compat_etree_Element,
@@ -28,11 +28,11 @@ from youtube_dl.compat import (
class TestCompat(unittest.TestCase):
def test_compat_getenv(self):
test_str = 'тест'
- compat_setenv('YOUTUBE_DL_COMPAT_GETENV', test_str)
- self.assertEqual(compat_getenv('YOUTUBE_DL_COMPAT_GETENV'), test_str)
+ compat_setenv('youtube_dlc_COMPAT_GETENV', test_str)
+ self.assertEqual(compat_getenv('youtube_dlc_COMPAT_GETENV'), test_str)
def test_compat_setenv(self):
- test_var = 'YOUTUBE_DL_COMPAT_SETENV'
+ test_var = 'youtube_dlc_COMPAT_SETENV'
test_str = 'тест'
compat_setenv(test_var, test_str)
compat_getenv(test_var)
@@ -46,11 +46,11 @@ class TestCompat(unittest.TestCase):
compat_setenv('HOME', old_home or '')
def test_all_present(self):
- import youtube_dl.compat
- all_names = youtube_dl.compat.__all__
+ import youtube_dlc.compat
+ all_names = youtube_dlc.compat.__all__
present_names = set(filter(
lambda c: '_' in c and not c.startswith('_'),
- dir(youtube_dl.compat))) - set(['unicode_literals'])
+ dir(youtube_dlc.compat))) - set(['unicode_literals'])
self.assertEqual(all_names, sorted(present_names))
def test_compat_urllib_parse_unquote(self):
diff --git a/test/test_download.py b/test/test_download.py
index ebe820dfc..bcd3b4041 100644
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -24,24 +24,24 @@ import io
import json
import socket
-import youtube_dl.YoutubeDL
-from youtube_dl.compat import (
+import youtube_dlc.YoutubeDL
+from youtube_dlc.compat import (
compat_http_client,
compat_urllib_error,
compat_HTTPError,
)
-from youtube_dl.utils import (
+from youtube_dlc.utils import (
DownloadError,
ExtractorError,
format_bytes,
UnavailableVideoError,
)
-from youtube_dl.extractor import get_info_extractor
+from youtube_dlc.extractor import get_info_extractor
RETRIES = 3
-class YoutubeDL(youtube_dl.YoutubeDL):
+class YoutubeDL(youtube_dlc.YoutubeDL):
def __init__(self, *args, **kwargs):
self.to_stderr = self.to_screen
self.processed_info_dicts = []
@@ -92,7 +92,7 @@ class TestDownload(unittest.TestCase):
def generator(test_case, tname):
def test_template(self):
- ie = youtube_dl.extractor.get_info_extractor(test_case['name'])()
+ ie = youtube_dlc.extractor.get_info_extractor(test_case['name'])()
other_ies = [get_info_extractor(ie_key)() for ie_key in test_case.get('add_ie', [])]
is_playlist = any(k.startswith('playlist') for k in test_case)
test_cases = test_case.get(
diff --git a/test/test_downloader_http.py b/test/test_downloader_http.py
index 750472281..c8e28bd3a 100644
--- a/test/test_downloader_http.py
+++ b/test/test_downloader_http.py
@@ -10,10 +10,10 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import http_server_port, try_rm
-from youtube_dl import YoutubeDL
-from youtube_dl.compat import compat_http_server
-from youtube_dl.downloader.http import HttpFD
-from youtube_dl.utils import encodeFilename
+from youtube_dlc import YoutubeDL
+from youtube_dlc.compat import compat_http_server
+from youtube_dlc.downloader.http import HttpFD
+from youtube_dlc.utils import encodeFilename
import threading
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
diff --git a/test/test_execution.py b/test/test_execution.py
index 11661bb68..b18e63d73 100644
--- a/test/test_execution.py
+++ b/test/test_execution.py
@@ -10,7 +10,7 @@ import os
import subprocess
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from youtube_dl.utils import encodeArgument
+from youtube_dlc.utils import encodeArgument
rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
@@ -23,18 +23,18 @@ except AttributeError:
class TestExecution(unittest.TestCase):
def test_import(self):
- subprocess.check_call([sys.executable, '-c', 'import youtube_dl'], cwd=rootDir)
+ subprocess.check_call([sys.executable, '-c', 'import youtube_dlc'], cwd=rootDir)
def test_module_exec(self):
if sys.version_info >= (2, 7): # Python 2.6 doesn't support package execution
- subprocess.check_call([sys.executable, '-m', 'youtube_dl', '--version'], cwd=rootDir, stdout=_DEV_NULL)
+ subprocess.check_call([sys.executable, '-m', 'youtube_dlc', '--version'], cwd=rootDir, stdout=_DEV_NULL)
def test_main_exec(self):
- subprocess.check_call([sys.executable, 'youtube_dl/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL)
+ subprocess.check_call([sys.executable, 'youtube_dlc/__main__.py', '--version'], cwd=rootDir, stdout=_DEV_NULL)
def test_cmdline_umlauts(self):
p = subprocess.Popen(
- [sys.executable, 'youtube_dl/__main__.py', encodeArgument('ä'), '--version'],
+ [sys.executable, 'youtube_dlc/__main__.py', encodeArgument('ä'), '--version'],
cwd=rootDir, stdout=_DEV_NULL, stderr=subprocess.PIPE)
_, stderr = p.communicate()
self.assertFalse(stderr)
diff --git a/test/test_http.py b/test/test_http.py
index 3ee0a5dda..55c3c6183 100644
--- a/test/test_http.py
+++ b/test/test_http.py
@@ -9,8 +9,8 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import http_server_port
-from youtube_dl import YoutubeDL
-from youtube_dl.compat import compat_http_server, compat_urllib_request
+from youtube_dlc import YoutubeDL
+from youtube_dlc.compat import compat_http_server, compat_urllib_request
import ssl
import threading
diff --git a/test/test_iqiyi_sdk_interpreter.py b/test/test_iqiyi_sdk_interpreter.py
index 789059dbe..303609baa 100644
--- a/test/test_iqiyi_sdk_interpreter.py
+++ b/test/test_iqiyi_sdk_interpreter.py
@@ -9,7 +9,7 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL
-from youtube_dl.extractor import IqiyiIE
+from youtube_dlc.extractor import IqiyiIE
class IqiyiIEWithCredentials(IqiyiIE):
diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py
index c24b8ca74..97fc8d5aa 100644
--- a/test/test_jsinterp.py
+++ b/test/test_jsinterp.py
@@ -8,7 +8,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from youtube_dl.jsinterp import JSInterpreter
+from youtube_dlc.jsinterp import JSInterpreter
class TestJSInterpreter(unittest.TestCase):
diff --git a/test/test_netrc.py b/test/test_netrc.py
index 7cf3a6a2e..566ba37a6 100644
--- a/test/test_netrc.py
+++ b/test/test_netrc.py
@@ -7,7 +7,7 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from youtube_dl.extractor import (
+from youtube_dlc.extractor import (
gen_extractors,
)
diff --git a/test/test_options.py b/test/test_options.py
index 3a25a6ba3..dce253373 100644
--- a/test/test_options.py
+++ b/test/test_options.py
@@ -8,7 +8,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from youtube_dl.options import _hide_login_info
+from youtube_dlc.options import _hide_login_info
class TestOptions(unittest.TestCase):
diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py
index 4209d1d9a..6f538a3da 100644
--- a/test/test_postprocessors.py
+++ b/test/test_postprocessors.py
@@ -8,7 +8,7 @@ import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from youtube_dl.postprocessor import MetadataFromTitlePP
+from youtube_dlc.postprocessor import MetadataFromTitlePP
class TestMetadataFromTitle(unittest.TestCase):
diff --git a/test/test_socks.py b/test/test_socks.py
index 1e68eb0da..be52e2343 100644
--- a/test/test_socks.py
+++ b/test/test_socks.py
@@ -15,7 +15,7 @@ from test.helper import (
FakeYDL,
get_params,
)
-from youtube_dl.compat import (
+from youtube_dlc.compat import (
compat_str,
compat_urllib_request,
)
diff --git a/test/test_subtitles.py b/test/test_subtitles.py
index 7d57a628e..3ca03fb6f 100644
--- a/test/test_subtitles.py
+++ b/test/test_subtitles.py
@@ -10,7 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, md5
-from youtube_dl.extractor import (
+from youtube_dlc.extractor import (
YoutubeIE,
DailymotionIE,
TEDIE,
@@ -26,7 +26,6 @@ from youtube_dl.extractor import (
ThePlatformIE,
ThePlatformFeedIE,
RTVEALaCartaIE,
- FunnyOrDieIE,
DemocracynowIE,
)
@@ -322,18 +321,6 @@ class TestRtveSubtitles(BaseTestSubtitles):
self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca')
-class TestFunnyOrDieSubtitles(BaseTestSubtitles):
- url = 'http://www.funnyordie.com/videos/224829ff6d/judd-apatow-will-direct-your-vine'
- IE = FunnyOrDieIE
-
- def test_allsubtitles(self):
- self.DL.params['writesubtitles'] = True
- self.DL.params['allsubtitles'] = True
- subtitles = self.getSubtitles()
- self.assertEqual(set(subtitles.keys()), set(['en']))
- self.assertEqual(md5(subtitles['en']), 'c5593c193eacd353596c11c2d4f9ecc4')
-
-
class TestDemocracynowSubtitles(BaseTestSubtitles):
url = 'http://www.democracynow.org/shows/2015/7/3'
IE = DemocracynowIE
diff --git a/test/test_swfinterp.py b/test/test_swfinterp.py
index 9f18055e6..1a8b353e8 100644
--- a/test/test_swfinterp.py
+++ b/test/test_swfinterp.py
@@ -14,7 +14,7 @@ import json
import re
import subprocess
-from youtube_dl.swfinterp import SWFInterpreter
+from youtube_dlc.swfinterp import SWFInterpreter
TEST_DIR = os.path.join(
diff --git a/test/test_update.py b/test/test_update.py
index d9c71511d..1b144c43c 100644
--- a/test/test_update.py
+++ b/test/test_update.py
@@ -10,7 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import json
-from youtube_dl.update import rsa_verify
+from youtube_dlc.update import rsa_verify
class TestUpdate(unittest.TestCase):
diff --git a/test/test_utils.py b/test/test_utils.py
index 659c6ece5..5914d4fd6 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -15,10 +15,11 @@ import io
import json
import xml.etree.ElementTree
-from youtube_dl.utils import (
+from youtube_dlc.utils import (
age_restricted,
args_to_str,
encode_base_n,
+ caesar,
clean_html,
date_from_str,
DateRange,
@@ -69,11 +70,13 @@ from youtube_dl.utils import (
remove_start,
remove_end,
remove_quotes,
+ rot47,
shell_quote,
smuggle_url,
str_to_int,
strip_jsonp,
strip_or_none,
+ subtitles_filename,
timeconvert,
unescapeHTML,
unified_strdate,
@@ -102,7 +105,7 @@ from youtube_dl.utils import (
cli_bool_option,
parse_codecs,
)
-from youtube_dl.compat import (
+from youtube_dlc.compat import (
compat_chr,
compat_etree_fromstring,
compat_getenv,
@@ -237,12 +240,12 @@ class TestUtil(unittest.TestCase):
def env(var):
return '%{0}%'.format(var) if sys.platform == 'win32' else '${0}'.format(var)
- compat_setenv('YOUTUBE_DL_EXPATH_PATH', 'expanded')
- self.assertEqual(expand_path(env('YOUTUBE_DL_EXPATH_PATH')), 'expanded')
+ compat_setenv('youtube_dlc_EXPATH_PATH', 'expanded')
+ self.assertEqual(expand_path(env('youtube_dlc_EXPATH_PATH')), 'expanded')
self.assertEqual(expand_path(env('HOME')), compat_getenv('HOME'))
self.assertEqual(expand_path('~'), compat_getenv('HOME'))
self.assertEqual(
- expand_path('~/%s' % env('YOUTUBE_DL_EXPATH_PATH')),
+ expand_path('~/%s' % env('youtube_dlc_EXPATH_PATH')),
'%s/expanded' % compat_getenv('HOME'))
def test_prepend_extension(self):
@@ -261,6 +264,11 @@ class TestUtil(unittest.TestCase):
self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp')
self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp')
+ def test_subtitles_filename(self):
+ self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt')
+ self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt')
+ self.assertEqual(subtitles_filename('abc.unexpected_ext', 'en', 'vtt', 'ext'), 'abc.unexpected_ext.en.vtt')
+
def test_remove_start(self):
self.assertEqual(remove_start(None, 'A - '), None)
self.assertEqual(remove_start('A - B', 'A - '), 'B')
@@ -334,6 +342,8 @@ class TestUtil(unittest.TestCase):
self.assertEqual(unified_strdate('July 15th, 2013'), '20130715')
self.assertEqual(unified_strdate('September 1st, 2013'), '20130901')
self.assertEqual(unified_strdate('Sep 2nd, 2013'), '20130902')
+ self.assertEqual(unified_strdate('November 3rd, 2019'), '20191103')
+ self.assertEqual(unified_strdate('October 23rd, 2005'), '20051023')
def test_unified_timestamps(self):
self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600)
@@ -489,6 +499,12 @@ class TestUtil(unittest.TestCase):
def test_str_to_int(self):
self.assertEqual(str_to_int('123,456'), 123456)
self.assertEqual(str_to_int('123.456'), 123456)
+ self.assertEqual(str_to_int(523), 523)
+ # Python 3 has no long
+ if sys.version_info < (3, 0):
+ eval('self.assertEqual(str_to_int(123456L), 123456)')
+ self.assertEqual(str_to_int('noninteger'), None)
+ self.assertEqual(str_to_int([]), None)
def test_url_basename(self):
self.assertEqual(url_basename('http://foo.de/'), '')
@@ -1361,6 +1377,20 @@ Line 1
self.assertRaises(ValueError, encode_base_n, 0, 70)
self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table)
+ def test_caesar(self):
+ self.assertEqual(caesar('ace', 'abcdef', 2), 'cea')
+ self.assertEqual(caesar('cea', 'abcdef', -2), 'ace')
+ self.assertEqual(caesar('ace', 'abcdef', -2), 'eac')
+ self.assertEqual(caesar('eac', 'abcdef', 2), 'ace')
+ self.assertEqual(caesar('ace', 'abcdef', 0), 'ace')
+ self.assertEqual(caesar('xyz', 'abcdef', 2), 'xyz')
+ self.assertEqual(caesar('abc', 'acegik', 2), 'ebg')
+ self.assertEqual(caesar('ebg', 'acegik', -2), 'abc')
+
+ def test_rot47(self):
+ self.assertEqual(rot47('youtube-dlc'), r'J@FEF36\5=4')
+ self.assertEqual(rot47('YOUTUBE-DLC'), r'*~&%&qt\s{r')
+
def test_urshift(self):
self.assertEqual(urshift(3, 1), 1)
self.assertEqual(urshift(-3, 1), 2147483646)
diff --git a/test/test_verbose_output.py b/test/test_verbose_output.py
index c1465fe8c..462f25e03 100644
--- a/test/test_verbose_output.py
+++ b/test/test_verbose_output.py
@@ -17,7 +17,7 @@ class TestVerboseOutput(unittest.TestCase):
def test_private_info_arg(self):
outp = subprocess.Popen(
[
- sys.executable, 'youtube_dl/__main__.py', '-v',
+ sys.executable, 'youtube_dlc/__main__.py', '-v',
'--username', 'johnsmith@gmail.com',
'--password', 'secret',
], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
@@ -30,7 +30,7 @@ class TestVerboseOutput(unittest.TestCase):
def test_private_info_shortarg(self):
outp = subprocess.Popen(
[
- sys.executable, 'youtube_dl/__main__.py', '-v',
+ sys.executable, 'youtube_dlc/__main__.py', '-v',
'-u', 'johnsmith@gmail.com',
'-p', 'secret',
], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
@@ -43,7 +43,7 @@ class TestVerboseOutput(unittest.TestCase):
def test_private_info_eq(self):
outp = subprocess.Popen(
[
- sys.executable, 'youtube_dl/__main__.py', '-v',
+ sys.executable, 'youtube_dlc/__main__.py', '-v',
'--username=johnsmith@gmail.com',
'--password=secret',
], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
@@ -56,7 +56,7 @@ class TestVerboseOutput(unittest.TestCase):
def test_private_info_shortarg_eq(self):
outp = subprocess.Popen(
[
- sys.executable, 'youtube_dl/__main__.py', '-v',
+ sys.executable, 'youtube_dlc/__main__.py', '-v',
'-u=johnsmith@gmail.com',
'-p=secret',
], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
diff --git a/test/test_write_annotations.py b/test/test_write_annotations.py
index 41abdfe3b..d98c96c15 100644
--- a/test/test_write_annotations.py
+++ b/test/test_write_annotations.py
@@ -15,11 +15,11 @@ import io
import xml.etree.ElementTree
-import youtube_dl.YoutubeDL
-import youtube_dl.extractor
+import youtube_dlc.YoutubeDL
+import youtube_dlc.extractor
-class YoutubeDL(youtube_dl.YoutubeDL):
+class YoutubeDL(youtube_dlc.YoutubeDL):
def __init__(self, *args, **kwargs):
super(YoutubeDL, self).__init__(*args, **kwargs)
self.to_stderr = self.to_screen
@@ -45,7 +45,7 @@ class TestAnnotations(unittest.TestCase):
def test_info_json(self):
expected = list(EXPECTED_ANNOTATIONS) # Two annotations could have the same text.
- ie = youtube_dl.extractor.YoutubeIE()
+ ie = youtube_dlc.extractor.YoutubeIE()
ydl = YoutubeDL(params)
ydl.add_info_extractor(ie)
ydl.download([TEST_ID])
diff --git a/test/test_youtube_chapters.py b/test/test_youtube_chapters.py
index 324ca8525..4529d2e84 100644
--- a/test/test_youtube_chapters.py
+++ b/test/test_youtube_chapters.py
@@ -9,7 +9,7 @@ import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import expect_value
-from youtube_dl.extractor import YoutubeIE
+from youtube_dlc.extractor import YoutubeIE
class TestYoutubeChapters(unittest.TestCase):
@@ -267,7 +267,7 @@ class TestYoutubeChapters(unittest.TestCase):
for description, duration, expected_chapters in self._TEST_CASES:
ie = YoutubeIE()
expect_value(
- self, ie._extract_chapters(description, duration),
+ self, ie._extract_chapters_from_description(description, duration),
expected_chapters, None)
diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py
index c4f0abbea..a693963ef 100644
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@@ -10,7 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL
-from youtube_dl.extractor import (
+from youtube_dlc.extractor import (
YoutubePlaylistIE,
YoutubeIE,
)
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py
index f0c370eee..a54b36198 100644
--- a/test/test_youtube_signature.py
+++ b/test/test_youtube_signature.py
@@ -13,8 +13,8 @@ import re
import string
from test.helper import FakeYDL
-from youtube_dl.extractor import YoutubeIE
-from youtube_dl.compat import compat_str, compat_urlretrieve
+from youtube_dlc.extractor import YoutubeIE
+from youtube_dlc.compat import compat_str, compat_urlretrieve
_TESTS = [
(
@@ -74,6 +74,28 @@ _TESTS = [
]
+class TestPlayerInfo(unittest.TestCase):
+ def test_youtube_extract_player_info(self):
+ PLAYER_URLS = (
+ ('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'),
+ # obsolete
+ ('https://www.youtube.com/yts/jsbin/player_ias-vfle4-e03/en_US/base.js', 'vfle4-e03'),
+ ('https://www.youtube.com/yts/jsbin/player_ias-vfl49f_g4/en_US/base.js', 'vfl49f_g4'),
+ ('https://www.youtube.com/yts/jsbin/player_ias-vflCPQUIL/en_US/base.js', 'vflCPQUIL'),
+ ('https://www.youtube.com/yts/jsbin/player-vflzQZbt7/en_US/base.js', 'vflzQZbt7'),
+ ('https://www.youtube.com/yts/jsbin/player-en_US-vflaxXRn1/base.js', 'vflaxXRn1'),
+ ('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', 'vflXGBaUN'),
+ ('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'vflKjOTVq'),
+ ('http://s.ytimg.com/yt/swfbin/watch_as3-vflrEm9Nq.swf', 'vflrEm9Nq'),
+ ('https://s.ytimg.com/yts/swfbin/player-vflenCdZL/watch_as3.swf', 'vflenCdZL'),
+ )
+ for player_url, expected_player_id in PLAYER_URLS:
+ expected_player_type = player_url.split('.')[-1]
+ player_type, player_id = YoutubeIE._extract_player_info(player_url)
+ self.assertEqual(player_type, expected_player_type)
+ self.assertEqual(player_id, expected_player_id)
+
+
class TestSignature(unittest.TestCase):
def setUp(self):
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
diff --git a/test/testdata/cookies/malformed_cookies.txt b/test/testdata/cookies/malformed_cookies.txt
new file mode 100644
index 000000000..17bc40354
--- /dev/null
+++ b/test/testdata/cookies/malformed_cookies.txt
@@ -0,0 +1,9 @@
+# Netscape HTTP Cookie File
+# http://curl.haxx.se/rfc/cookie_spec.html
+# This is a generated file! Do not edit.
+
+# Cookie file entry with invalid number of fields - 6 instead of 7
+www.foobar.foobar FALSE / FALSE 0 COOKIE
+
+# Cookie file entry with invalid expires at
+www.foobar.foobar FALSE / FALSE 1.7976931348623157e+308 COOKIE VALUE
diff --git a/tox.ini b/tox.ini
index 9c4e4a3d1..842091d65 100644
--- a/tox.ini
+++ b/tox.ini
@@ -10,5 +10,5 @@ defaultargs = test --exclude test_download.py --exclude test_age_restriction.py
--exclude test_subtitles.py --exclude test_write_annotations.py
--exclude test_youtube_lists.py --exclude test_iqiyi_sdk_interpreter.py
--exclude test_socks.py
-commands = nosetests --verbose {posargs:{[testenv]defaultargs}} # --with-coverage --cover-package=youtube_dl --cover-html
+commands = nosetests --verbose {posargs:{[testenv]defaultargs}} # --with-coverage --cover-package=youtube_dlc --cover-html
# test.test_download:TestDownload.test_NowVideo
diff --git a/youtube-dl.plugin.zsh b/youtube-dl.plugin.zsh
deleted file mode 100644
index 17ab1341a..000000000
--- a/youtube-dl.plugin.zsh
+++ /dev/null
@@ -1,24 +0,0 @@
-# This allows the youtube-dl command to be installed in ZSH using antigen.
-# Antigen is a bundle manager. It allows you to enhance the functionality of
-# your zsh session by installing bundles and themes easily.
-
-# Antigen documentation:
-# http://antigen.sharats.me/
-# https://github.com/zsh-users/antigen
-
-# Install youtube-dl:
-# antigen bundle ytdl-org/youtube-dl
-# Bundles installed by antigen are available for use immediately.
-
-# Update youtube-dl (and all other antigen bundles):
-# antigen update
-
-# The antigen command will download the git repository to a folder and then
-# execute an enabling script (this file). The complete process for loading the
-# code is documented here:
-# https://github.com/zsh-users/antigen#notes-on-writing-plugins
-
-# This specific script just aliases youtube-dl to the python script that this
-# library provides. This requires updating the PYTHONPATH to ensure that the
-# full set of code can be located.
-alias youtube-dl="PYTHONPATH=$(dirname $0) $(dirname $0)/bin/youtube-dl"
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
deleted file mode 100755
index c3d1407f9..000000000
--- a/youtube_dl/YoutubeDL.py
+++ /dev/null
@@ -1,2417 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-
-from __future__ import absolute_import, unicode_literals
-
-import collections
-import contextlib
-import copy
-import datetime
-import errno
-import fileinput
-import io
-import itertools
-import json
-import locale
-import operator
-import os
-import platform
-import re
-import shutil
-import subprocess
-import socket
-import sys
-import time
-import tokenize
-import traceback
-import random
-
-from string import ascii_letters
-
-from .compat import (
- compat_basestring,
- compat_cookiejar,
- compat_get_terminal_size,
- compat_http_client,
- compat_kwargs,
- compat_numeric_types,
- compat_os_name,
- compat_str,
- compat_tokenize_tokenize,
- compat_urllib_error,
- compat_urllib_request,
- compat_urllib_request_DataHandler,
-)
-from .utils import (
- age_restricted,
- args_to_str,
- ContentTooShortError,
- date_from_str,
- DateRange,
- DEFAULT_OUTTMPL,
- determine_ext,
- determine_protocol,
- DownloadError,
- encode_compat_str,
- encodeFilename,
- error_to_compat_str,
- expand_path,
- ExtractorError,
- format_bytes,
- formatSeconds,
- GeoRestrictedError,
- int_or_none,
- ISO3166Utils,
- locked_file,
- make_HTTPS_handler,
- MaxDownloadsReached,
- orderedSet,
- PagedList,
- parse_filesize,
- PerRequestProxyHandler,
- platform_name,
- PostProcessingError,
- preferredencoding,
- prepend_extension,
- register_socks_protocols,
- render_table,
- replace_extension,
- SameFileError,
- sanitize_filename,
- sanitize_path,
- sanitize_url,
- sanitized_Request,
- std_headers,
- str_or_none,
- subtitles_filename,
- UnavailableVideoError,
- url_basename,
- version_tuple,
- write_json_file,
- write_string,
- YoutubeDLCookieJar,
- YoutubeDLCookieProcessor,
- YoutubeDLHandler,
-)
-from .cache import Cache
-from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
-from .extractor.openload import PhantomJSwrapper
-from .downloader import get_suitable_downloader
-from .downloader.rtmp import rtmpdump_version
-from .postprocessor import (
- FFmpegFixupM3u8PP,
- FFmpegFixupM4aPP,
- FFmpegFixupStretchedPP,
- FFmpegMergerPP,
- FFmpegPostProcessor,
- get_postprocessor,
-)
-from .version import __version__
-
-if compat_os_name == 'nt':
- import ctypes
-
-
-class YoutubeDL(object):
- """YoutubeDL class.
-
- YoutubeDL objects are the ones responsible of downloading the
- actual video file and writing it to disk if the user has requested
- it, among some other tasks. In most cases there should be one per
- program. As, given a video URL, the downloader doesn't know how to
- extract all the needed information, task that InfoExtractors do, it
- has to pass the URL to one of them.
-
- For this, YoutubeDL objects have a method that allows
- InfoExtractors to be registered in a given order. When it is passed
- a URL, the YoutubeDL object handles it to the first InfoExtractor it
- finds that reports being able to handle it. The InfoExtractor extracts
- all the information about the video or videos the URL refers to, and
- YoutubeDL process the extracted information, possibly using a File
- Downloader to download the video.
-
- YoutubeDL objects accept a lot of parameters. In order not to saturate
- the object constructor with arguments, it receives a dictionary of
- options instead. These options are available through the params
- attribute for the InfoExtractors to use. The YoutubeDL also
- registers itself as the downloader in charge for the InfoExtractors
- that are added to it, so this is a "mutual registration".
-
- Available options:
-
- username: Username for authentication purposes.
- password: Password for authentication purposes.
- videopassword: Password for accessing a video.
- ap_mso: Adobe Pass multiple-system operator identifier.
- ap_username: Multiple-system operator account username.
- ap_password: Multiple-system operator account password.
- usenetrc: Use netrc for authentication instead.
- verbose: Print additional info to stdout.
- quiet: Do not print messages to stdout.
- no_warnings: Do not print out anything for warnings.
- forceurl: Force printing final URL.
- forcetitle: Force printing title.
- forceid: Force printing ID.
- forcethumbnail: Force printing thumbnail URL.
- forcedescription: Force printing description.
- forcefilename: Force printing final filename.
- forceduration: Force printing duration.
- forcejson: Force printing info_dict as JSON.
- dump_single_json: Force printing the info_dict of the whole playlist
- (or video) as a single JSON line.
- simulate: Do not download the video files.
- format: Video format code. See options.py for more information.
- outtmpl: Template for output names.
- restrictfilenames: Do not allow "&" and spaces in file names
- ignoreerrors: Do not stop on download errors.
- force_generic_extractor: Force downloader to use the generic extractor
- nooverwrites: Prevent overwriting files.
- playliststart: Playlist item to start at.
- playlistend: Playlist item to end at.
- playlist_items: Specific indices of playlist to download.
- playlistreverse: Download playlist items in reverse order.
- playlistrandom: Download playlist items in random order.
- matchtitle: Download only matching titles.
- rejecttitle: Reject downloads for matching titles.
- logger: Log messages to a logging.Logger instance.
- logtostderr: Log messages to stderr instead of stdout.
- writedescription: Write the video description to a .description file
- writeinfojson: Write the video description to a .info.json file
- writeannotations: Write the video annotations to a .annotations.xml file
- writethumbnail: Write the thumbnail image to a file
- write_all_thumbnails: Write all thumbnail formats to files
- writesubtitles: Write the video subtitles to a file
- writeautomaticsub: Write the automatically generated subtitles to a file
- allsubtitles: Downloads all the subtitles of the video
- (requires writesubtitles or writeautomaticsub)
- listsubtitles: Lists all available subtitles for the video
- subtitlesformat: The format code for subtitles
- subtitleslangs: List of languages of the subtitles to download
- keepvideo: Keep the video file after post-processing
- daterange: A DateRange object, download only if the upload_date is in the range.
- skip_download: Skip the actual download of the video file
- cachedir: Location of the cache files in the filesystem.
- False to disable filesystem cache.
- noplaylist: Download single video instead of a playlist if in doubt.
- age_limit: An integer representing the user's age in years.
- Unsuitable videos for the given age are skipped.
- min_views: An integer representing the minimum view count the video
- must have in order to not be skipped.
- Videos without view count information are always
- downloaded. None for no limit.
- max_views: An integer representing the maximum view count.
- Videos that are more popular than that are not
- downloaded.
- Videos without view count information are always
- downloaded. None for no limit.
- download_archive: File name of a file where all downloads are recorded.
- Videos already present in the file are not downloaded
- again.
- cookiefile: File name where cookies should be read from and dumped to.
- nocheckcertificate:Do not verify SSL certificates
- prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
- At the moment, this is only supported by YouTube.
- proxy: URL of the proxy server to use
- geo_verification_proxy: URL of the proxy to use for IP address verification
- on geo-restricted sites.
- socket_timeout: Time to wait for unresponsive hosts, in seconds
- bidi_workaround: Work around buggy terminals without bidirectional text
- support, using fridibi
- debug_printtraffic:Print out sent and received HTTP traffic
- include_ads: Download ads as well
- default_search: Prepend this string if an input url is not valid.
- 'auto' for elaborate guessing
- encoding: Use this encoding instead of the system-specified.
- extract_flat: Do not resolve URLs, return the immediate result.
- Pass in 'in_playlist' to only show this behavior for
- playlist items.
- postprocessors: A list of dictionaries, each with an entry
- * key: The name of the postprocessor. See
- youtube_dl/postprocessor/__init__.py for a list.
- as well as any further keyword arguments for the
- postprocessor.
- progress_hooks: A list of functions that get called on download
- progress, with a dictionary with the entries
- * status: One of "downloading", "error", or "finished".
- Check this first and ignore unknown values.
-
- If status is one of "downloading", or "finished", the
- following properties may also be present:
- * filename: The final filename (always present)
- * tmpfilename: The filename we're currently writing to
- * downloaded_bytes: Bytes on disk
- * total_bytes: Size of the whole file, None if unknown
- * total_bytes_estimate: Guess of the eventual file size,
- None if unavailable.
- * elapsed: The number of seconds since download started.
- * eta: The estimated time in seconds, None if unknown
- * speed: The download speed in bytes/second, None if
- unknown
- * fragment_index: The counter of the currently
- downloaded video fragment.
- * fragment_count: The number of fragments (= individual
- files that will be merged)
-
- Progress hooks are guaranteed to be called at least once
- (with status "finished") if the download is successful.
- merge_output_format: Extension to use when merging formats.
- fixup: Automatically correct known faults of the file.
- One of:
- - "never": do nothing
- - "warn": only emit a warning
- - "detect_or_warn": check whether we can do anything
- about it, warn otherwise (default)
- source_address: Client-side IP address to bind to.
- call_home: Boolean, true iff we are allowed to contact the
- youtube-dl servers for debugging.
- sleep_interval: Number of seconds to sleep before each download when
- used alone or a lower bound of a range for randomized
- sleep before each download (minimum possible number
- of seconds to sleep) when used along with
- max_sleep_interval.
- max_sleep_interval:Upper bound of a range for randomized sleep before each
- download (maximum possible number of seconds to sleep).
- Must only be used along with sleep_interval.
- Actual sleep time will be a random float from range
- [sleep_interval; max_sleep_interval].
- listformats: Print an overview of available video formats and exit.
- list_thumbnails: Print a table of all thumbnails and exit.
- match_filter: A function that gets called with the info_dict of
- every video.
- If it returns a message, the video is ignored.
- If it returns None, the video is downloaded.
- match_filter_func in utils.py is one example for this.
- no_color: Do not emit color codes in output.
- geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
- HTTP header
- geo_bypass_country:
- Two-letter ISO 3166-2 country code that will be used for
- explicit geographic restriction bypassing via faking
- X-Forwarded-For HTTP header
- geo_bypass_ip_block:
- IP range in CIDR notation that will be used similarly to
- geo_bypass_country
-
- The following options determine which downloader is picked:
- external_downloader: Executable of the external downloader to call.
- None or unset for standard (built-in) downloader.
- hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
- if True, otherwise use ffmpeg/avconv if False, otherwise
- use downloader suggested by extractor if None.
-
- The following parameters are not used by YoutubeDL itself, they are used by
- the downloader (see youtube_dl/downloader/common.py):
- nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
- noresizebuffer, retries, continuedl, noprogress, consoletitle,
- xattr_set_filesize, external_downloader_args, hls_use_mpegts,
- http_chunk_size.
-
- The following options are used by the post processors:
- prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
- otherwise prefer ffmpeg.
- ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
- to the binary or its containing directory.
- postprocessor_args: A list of additional command-line arguments for the
- postprocessor.
-
- The following options are used by the Youtube extractor:
- youtube_include_dash_manifest: If True (default), DASH manifests and related
- data will be downloaded and processed by extractor.
- You can reduce network I/O by disabling it if you don't
- care about DASH.
- """
-
- _NUMERIC_FIELDS = set((
- 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
- 'timestamp', 'upload_year', 'upload_month', 'upload_day',
- 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
- 'average_rating', 'comment_count', 'age_limit',
- 'start_time', 'end_time',
- 'chapter_number', 'season_number', 'episode_number',
- 'track_number', 'disc_number', 'release_year',
- 'playlist_index',
- ))
-
- params = None
- _ies = []
- _pps = []
- _download_retcode = None
- _num_downloads = None
- _screen_file = None
-
- def __init__(self, params=None, auto_init=True):
- """Create a FileDownloader object with the given options."""
- if params is None:
- params = {}
- self._ies = []
- self._ies_instances = {}
- self._pps = []
- self._progress_hooks = []
- self._download_retcode = 0
- self._num_downloads = 0
- self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
- self._err_file = sys.stderr
- self.params = {
- # Default parameters
- 'nocheckcertificate': False,
- }
- self.params.update(params)
- self.cache = Cache(self)
-
- def check_deprecated(param, option, suggestion):
- if self.params.get(param) is not None:
- self.report_warning(
- '%s is deprecated. Use %s instead.' % (option, suggestion))
- return True
- return False
-
- if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
- if self.params.get('geo_verification_proxy') is None:
- self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
-
- check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
- check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
- check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
-
- if params.get('bidi_workaround', False):
- try:
- import pty
- master, slave = pty.openpty()
- width = compat_get_terminal_size().columns
- if width is None:
- width_args = []
- else:
- width_args = ['-w', str(width)]
- sp_kwargs = dict(
- stdin=subprocess.PIPE,
- stdout=slave,
- stderr=self._err_file)
- try:
- self._output_process = subprocess.Popen(
- ['bidiv'] + width_args, **sp_kwargs
- )
- except OSError:
- self._output_process = subprocess.Popen(
- ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
- self._output_channel = os.fdopen(master, 'rb')
- except OSError as ose:
- if ose.errno == errno.ENOENT:
- self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
- else:
- raise
-
- if (sys.platform != 'win32'
- and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
- and not params.get('restrictfilenames', False)):
- # Unicode filesystem API will throw errors (#1474, #13027)
- self.report_warning(
- 'Assuming --restrict-filenames since file system encoding '
- 'cannot encode all characters. '
- 'Set the LC_ALL environment variable to fix this.')
- self.params['restrictfilenames'] = True
-
- if isinstance(params.get('outtmpl'), bytes):
- self.report_warning(
- 'Parameter outtmpl is bytes, but should be a unicode string. '
- 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
-
- self._setup_opener()
-
- if auto_init:
- self.print_debug_header()
- self.add_default_info_extractors()
-
- for pp_def_raw in self.params.get('postprocessors', []):
- pp_class = get_postprocessor(pp_def_raw['key'])
- pp_def = dict(pp_def_raw)
- del pp_def['key']
- pp = pp_class(self, **compat_kwargs(pp_def))
- self.add_post_processor(pp)
-
- for ph in self.params.get('progress_hooks', []):
- self.add_progress_hook(ph)
-
- register_socks_protocols()
-
- def warn_if_short_id(self, argv):
- # short YouTube ID starting with dash?
- idxs = [
- i for i, a in enumerate(argv)
- if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
- if idxs:
- correct_argv = (
- ['youtube-dl']
- + [a for i, a in enumerate(argv) if i not in idxs]
- + ['--'] + [argv[i] for i in idxs]
- )
- self.report_warning(
- 'Long argument string detected. '
- 'Use -- to separate parameters and URLs, like this:\n%s\n' %
- args_to_str(correct_argv))
-
- def add_info_extractor(self, ie):
- """Add an InfoExtractor object to the end of the list."""
- self._ies.append(ie)
- if not isinstance(ie, type):
- self._ies_instances[ie.ie_key()] = ie
- ie.set_downloader(self)
-
- def get_info_extractor(self, ie_key):
- """
- Get an instance of an IE with name ie_key, it will try to get one from
- the _ies list, if there's no instance it will create a new one and add
- it to the extractor list.
- """
- ie = self._ies_instances.get(ie_key)
- if ie is None:
- ie = get_info_extractor(ie_key)()
- self.add_info_extractor(ie)
- return ie
-
- def add_default_info_extractors(self):
- """
- Add the InfoExtractors returned by gen_extractors to the end of the list
- """
- for ie in gen_extractor_classes():
- self.add_info_extractor(ie)
-
- def add_post_processor(self, pp):
- """Add a PostProcessor object to the end of the chain."""
- self._pps.append(pp)
- pp.set_downloader(self)
-
- def add_progress_hook(self, ph):
- """Add the progress hook (currently only for the file downloader)"""
- self._progress_hooks.append(ph)
-
- def _bidi_workaround(self, message):
- if not hasattr(self, '_output_channel'):
- return message
-
- assert hasattr(self, '_output_process')
- assert isinstance(message, compat_str)
- line_count = message.count('\n') + 1
- self._output_process.stdin.write((message + '\n').encode('utf-8'))
- self._output_process.stdin.flush()
- res = ''.join(self._output_channel.readline().decode('utf-8')
- for _ in range(line_count))
- return res[:-len('\n')]
-
- def to_screen(self, message, skip_eol=False):
- """Print message to stdout if not in quiet mode."""
- return self.to_stdout(message, skip_eol, check_quiet=True)
-
- def _write_string(self, s, out=None):
- write_string(s, out=out, encoding=self.params.get('encoding'))
-
- def to_stdout(self, message, skip_eol=False, check_quiet=False):
- """Print message to stdout if not in quiet mode."""
- if self.params.get('logger'):
- self.params['logger'].debug(message)
- elif not check_quiet or not self.params.get('quiet', False):
- message = self._bidi_workaround(message)
- terminator = ['\n', ''][skip_eol]
- output = message + terminator
-
- self._write_string(output, self._screen_file)
-
- def to_stderr(self, message):
- """Print message to stderr."""
- assert isinstance(message, compat_str)
- if self.params.get('logger'):
- self.params['logger'].error(message)
- else:
- message = self._bidi_workaround(message)
- output = message + '\n'
- self._write_string(output, self._err_file)
-
- def to_console_title(self, message):
- if not self.params.get('consoletitle', False):
- return
- if compat_os_name == 'nt':
- if ctypes.windll.kernel32.GetConsoleWindow():
- # c_wchar_p() might not be necessary if `message` is
- # already of type unicode()
- ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
- elif 'TERM' in os.environ:
- self._write_string('\033]0;%s\007' % message, self._screen_file)
-
- def save_console_title(self):
- if not self.params.get('consoletitle', False):
- return
- if self.params.get('simulate', False):
- return
- if compat_os_name != 'nt' and 'TERM' in os.environ:
- # Save the title on stack
- self._write_string('\033[22;0t', self._screen_file)
-
- def restore_console_title(self):
- if not self.params.get('consoletitle', False):
- return
- if self.params.get('simulate', False):
- return
- if compat_os_name != 'nt' and 'TERM' in os.environ:
- # Restore the title from stack
- self._write_string('\033[23;0t', self._screen_file)
-
- def __enter__(self):
- self.save_console_title()
- return self
-
- def __exit__(self, *args):
- self.restore_console_title()
-
- if self.params.get('cookiefile') is not None:
- self.cookiejar.save(ignore_discard=True, ignore_expires=True)
-
- def trouble(self, message=None, tb=None):
- """Determine action to take when a download problem appears.
-
- Depending on if the downloader has been configured to ignore
- download errors or not, this method may throw an exception or
- not when errors are found, after printing the message.
-
- tb, if given, is additional traceback information.
- """
- if message is not None:
- self.to_stderr(message)
- if self.params.get('verbose'):
- if tb is None:
- if sys.exc_info()[0]: # if .trouble has been called from an except block
- tb = ''
- if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
- tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
- tb += encode_compat_str(traceback.format_exc())
- else:
- tb_data = traceback.format_list(traceback.extract_stack())
- tb = ''.join(tb_data)
- self.to_stderr(tb)
- if not self.params.get('ignoreerrors', False):
- if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
- exc_info = sys.exc_info()[1].exc_info
- else:
- exc_info = sys.exc_info()
- raise DownloadError(message, exc_info)
- self._download_retcode = 1
-
- def report_warning(self, message):
- '''
- Print the message to stderr, it will be prefixed with 'WARNING:'
- If stderr is a tty file the 'WARNING:' will be colored
- '''
- if self.params.get('logger') is not None:
- self.params['logger'].warning(message)
- else:
- if self.params.get('no_warnings'):
- return
- if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
- _msg_header = '\033[0;33mWARNING:\033[0m'
- else:
- _msg_header = 'WARNING:'
- warning_message = '%s %s' % (_msg_header, message)
- self.to_stderr(warning_message)
-
- def report_error(self, message, tb=None):
- '''
- Do the same as trouble, but prefixes the message with 'ERROR:', colored
- in red if stderr is a tty file.
- '''
- if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
- _msg_header = '\033[0;31mERROR:\033[0m'
- else:
- _msg_header = 'ERROR:'
- error_message = '%s %s' % (_msg_header, message)
- self.trouble(error_message, tb)
-
- def report_file_already_downloaded(self, file_name):
- """Report file has already been fully downloaded."""
- try:
- self.to_screen('[download] %s has already been downloaded' % file_name)
- except UnicodeEncodeError:
- self.to_screen('[download] The file has already been downloaded')
-
- def prepare_filename(self, info_dict):
- """Generate the output filename."""
- try:
- template_dict = dict(info_dict)
-
- template_dict['epoch'] = int(time.time())
- autonumber_size = self.params.get('autonumber_size')
- if autonumber_size is None:
- autonumber_size = 5
- template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
- if template_dict.get('resolution') is None:
- if template_dict.get('width') and template_dict.get('height'):
- template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
- elif template_dict.get('height'):
- template_dict['resolution'] = '%sp' % template_dict['height']
- elif template_dict.get('width'):
- template_dict['resolution'] = '%dx?' % template_dict['width']
-
- sanitize = lambda k, v: sanitize_filename(
- compat_str(v),
- restricted=self.params.get('restrictfilenames'),
- is_id=(k == 'id' or k.endswith('_id')))
- template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
- for k, v in template_dict.items()
- if v is not None and not isinstance(v, (list, tuple, dict)))
- template_dict = collections.defaultdict(lambda: 'NA', template_dict)
-
- outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
-
- # For fields playlist_index and autonumber convert all occurrences
- # of %(field)s to %(field)0Nd for backward compatibility
- field_size_compat_map = {
- 'playlist_index': len(str(template_dict['n_entries'])),
- 'autonumber': autonumber_size,
- }
- FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
- mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
- if mobj:
- outtmpl = re.sub(
- FIELD_SIZE_COMPAT_RE,
- r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
- outtmpl)
-
- # Missing numeric fields used together with integer presentation types
- # in format specification will break the argument substitution since
- # string 'NA' is returned for missing fields. We will patch output
- # template for missing fields to meet string presentation type.
- for numeric_field in self._NUMERIC_FIELDS:
- if numeric_field not in template_dict:
- # As of [1] format syntax is:
- # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
- # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
- FORMAT_RE = r'''(?x)
- (?<!%)
- %
- \({0}\) # mapping key
- (?:[#0\-+ ]+)? # conversion flags (optional)
- (?:\d+)? # minimum field width (optional)
- (?:\.\d+)? # precision (optional)
- [hlL]? # length modifier (optional)
- [diouxXeEfFgGcrs%] # conversion type
- '''
- outtmpl = re.sub(
- FORMAT_RE.format(numeric_field),
- r'%({0})s'.format(numeric_field), outtmpl)
-
- # expand_path translates '%%' into '%' and '$$' into '$'
- # correspondingly that is not what we want since we need to keep
- # '%%' intact for template dict substitution step. Working around
- # with boundary-alike separator hack.
- sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
- outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
-
- # outtmpl should be expand_path'ed before template dict substitution
- # because meta fields may contain env variables we don't want to
- # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
- # title "Hello $PATH", we don't want `$PATH` to be expanded.
- filename = expand_path(outtmpl).replace(sep, '') % template_dict
-
- # Temporary fix for #4787
- # 'Treat' all problem characters by passing filename through preferredencoding
- # to workaround encoding issues with subprocess on python2 @ Windows
- if sys.version_info < (3, 0) and sys.platform == 'win32':
- filename = encodeFilename(filename, True).decode(preferredencoding())
- return sanitize_path(filename)
- except ValueError as err:
- self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
- return None
-
- def _match_entry(self, info_dict, incomplete):
- """ Returns None iff the file should be downloaded """
-
- video_title = info_dict.get('title', info_dict.get('id', 'video'))
- if 'title' in info_dict:
- # This can happen when we're just evaluating the playlist
- title = info_dict['title']
- matchtitle = self.params.get('matchtitle', False)
- if matchtitle:
- if not re.search(matchtitle, title, re.IGNORECASE):
- return '"' + title + '" title did not match pattern "' + matchtitle + '"'
- rejecttitle = self.params.get('rejecttitle', False)
- if rejecttitle:
- if re.search(rejecttitle, title, re.IGNORECASE):
- return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
- date = info_dict.get('upload_date')
- if date is not None:
- dateRange = self.params.get('daterange', DateRange())
- if date not in dateRange:
- return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
- view_count = info_dict.get('view_count')
- if view_count is not None:
- min_views = self.params.get('min_views')
- if min_views is not None and view_count < min_views:
- return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
- max_views = self.params.get('max_views')
- if max_views is not None and view_count > max_views:
- return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
- if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
- return 'Skipping "%s" because it is age restricted' % video_title
- if self.in_download_archive(info_dict):
- return '%s has already been recorded in archive' % video_title
-
- if not incomplete:
- match_filter = self.params.get('match_filter')
- if match_filter is not None:
- ret = match_filter(info_dict)
- if ret is not None:
- return ret
-
- return None
-
- @staticmethod
- def add_extra_info(info_dict, extra_info):
- '''Set the keys from extra_info in info dict if they are missing'''
- for key, value in extra_info.items():
- info_dict.setdefault(key, value)
-
- def extract_info(self, url, download=True, ie_key=None, extra_info={},
- process=True, force_generic_extractor=False):
- '''
- Returns a list with a dictionary for each video we find.
- If 'download', also downloads the videos.
- extra_info is a dict containing the extra values to add to each result
- '''
-
- if not ie_key and force_generic_extractor:
- ie_key = 'Generic'
-
- if ie_key:
- ies = [self.get_info_extractor(ie_key)]
- else:
- ies = self._ies
-
- for ie in ies:
- if not ie.suitable(url):
- continue
-
- ie = self.get_info_extractor(ie.ie_key())
- if not ie.working():
- self.report_warning('The program functionality for this site has been marked as broken, '
- 'and will probably not work.')
-
- try:
- ie_result = ie.extract(url)
- if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
- break
- if isinstance(ie_result, list):
- # Backwards compatibility: old IE result format
- ie_result = {
- '_type': 'compat_list',
- 'entries': ie_result,
- }
- self.add_default_extra_info(ie_result, ie, url)
- if process:
- return self.process_ie_result(ie_result, download, extra_info)
- else:
- return ie_result
- except GeoRestrictedError as e:
- msg = e.msg
- if e.countries:
- msg += '\nThis video is available in %s.' % ', '.join(
- map(ISO3166Utils.short2full, e.countries))
- msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
- self.report_error(msg)
- break
- except ExtractorError as e: # An error we somewhat expected
- self.report_error(compat_str(e), e.format_traceback())
- break
- except MaxDownloadsReached:
- raise
- except Exception as e:
- if self.params.get('ignoreerrors', False):
- self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
- break
- else:
- raise
- else:
- self.report_error('no suitable InfoExtractor for URL %s' % url)
-
- def add_default_extra_info(self, ie_result, ie, url):
- self.add_extra_info(ie_result, {
- 'extractor': ie.IE_NAME,
- 'webpage_url': url,
- 'webpage_url_basename': url_basename(url),
- 'extractor_key': ie.ie_key(),
- })
-
- def process_ie_result(self, ie_result, download=True, extra_info={}):
- """
- Take the result of the ie(may be modified) and resolve all unresolved
- references (URLs, playlist items).
-
- It will also download the videos if 'download'.
- Returns the resolved ie_result.
- """
- result_type = ie_result.get('_type', 'video')
-
- if result_type in ('url', 'url_transparent'):
- ie_result['url'] = sanitize_url(ie_result['url'])
- extract_flat = self.params.get('extract_flat', False)
- if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
- or extract_flat is True):
- self.__forced_printings(
- ie_result, self.prepare_filename(ie_result),
- incomplete=True)
- return ie_result
-
- if result_type == 'video':
- self.add_extra_info(ie_result, extra_info)
- return self.process_video_result(ie_result, download=download)
- elif result_type == 'url':
- # We have to add extra_info to the results because it may be
- # contained in a playlist
- return self.extract_info(ie_result['url'],
- download,
- ie_key=ie_result.get('ie_key'),
- extra_info=extra_info)
- elif result_type == 'url_transparent':
- # Use the information from the embedding page
- info = self.extract_info(
- ie_result['url'], ie_key=ie_result.get('ie_key'),
- extra_info=extra_info, download=False, process=False)
-
- # extract_info may return None when ignoreerrors is enabled and
- # extraction failed with an error, don't crash and return early
- # in this case
- if not info:
- return info
-
- force_properties = dict(
- (k, v) for k, v in ie_result.items() if v is not None)
- for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
- if f in force_properties:
- del force_properties[f]
- new_result = info.copy()
- new_result.update(force_properties)
-
- # Extracted info may not be a video result (i.e.
- # info.get('_type', 'video') != video) but rather an url or
- # url_transparent. In such cases outer metadata (from ie_result)
- # should be propagated to inner one (info). For this to happen
- # _type of info should be overridden with url_transparent. This
- # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
- if new_result.get('_type') == 'url':
- new_result['_type'] = 'url_transparent'
-
- return self.process_ie_result(
- new_result, download=download, extra_info=extra_info)
- elif result_type in ('playlist', 'multi_video'):
- # We process each entry in the playlist
- playlist = ie_result.get('title') or ie_result.get('id')
- self.to_screen('[download] Downloading playlist: %s' % playlist)
-
- playlist_results = []
-
- playliststart = self.params.get('playliststart', 1) - 1
- playlistend = self.params.get('playlistend')
- # For backwards compatibility, interpret -1 as whole list
- if playlistend == -1:
- playlistend = None
-
- playlistitems_str = self.params.get('playlist_items')
- playlistitems = None
- if playlistitems_str is not None:
- def iter_playlistitems(format):
- for string_segment in format.split(','):
- if '-' in string_segment:
- start, end = string_segment.split('-')
- for item in range(int(start), int(end) + 1):
- yield int(item)
- else:
- yield int(string_segment)
- playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
-
- ie_entries = ie_result['entries']
-
- def make_playlistitems_entries(list_ie_entries):
- num_entries = len(list_ie_entries)
- return [
- list_ie_entries[i - 1] for i in playlistitems
- if -num_entries <= i - 1 < num_entries]
-
- def report_download(num_entries):
- self.to_screen(
- '[%s] playlist %s: Downloading %d videos' %
- (ie_result['extractor'], playlist, num_entries))
-
- if isinstance(ie_entries, list):
- n_all_entries = len(ie_entries)
- if playlistitems:
- entries = make_playlistitems_entries(ie_entries)
- else:
- entries = ie_entries[playliststart:playlistend]
- n_entries = len(entries)
- self.to_screen(
- '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
- (ie_result['extractor'], playlist, n_all_entries, n_entries))
- elif isinstance(ie_entries, PagedList):
- if playlistitems:
- entries = []
- for item in playlistitems:
- entries.extend(ie_entries.getslice(
- item - 1, item
- ))
- else:
- entries = ie_entries.getslice(
- playliststart, playlistend)
- n_entries = len(entries)
- report_download(n_entries)
- else: # iterable
- if playlistitems:
- entries = make_playlistitems_entries(list(itertools.islice(
- ie_entries, 0, max(playlistitems))))
- else:
- entries = list(itertools.islice(
- ie_entries, playliststart, playlistend))
- n_entries = len(entries)
- report_download(n_entries)
-
- if self.params.get('playlistreverse', False):
- entries = entries[::-1]
-
- if self.params.get('playlistrandom', False):
- random.shuffle(entries)
-
- x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
-
- for i, entry in enumerate(entries, 1):
- self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
- # This __x_forwarded_for_ip thing is a bit ugly but requires
- # minimal changes
- if x_forwarded_for:
- entry['__x_forwarded_for_ip'] = x_forwarded_for
- extra = {
- 'n_entries': n_entries,
- 'playlist': playlist,
- 'playlist_id': ie_result.get('id'),
- 'playlist_title': ie_result.get('title'),
- 'playlist_uploader': ie_result.get('uploader'),
- 'playlist_uploader_id': ie_result.get('uploader_id'),
- 'playlist_index': i + playliststart,
- 'extractor': ie_result['extractor'],
- 'webpage_url': ie_result['webpage_url'],
- 'webpage_url_basename': url_basename(ie_result['webpage_url']),
- 'extractor_key': ie_result['extractor_key'],
- }
-
- reason = self._match_entry(entry, incomplete=True)
- if reason is not None:
- self.to_screen('[download] ' + reason)
- continue
-
- entry_result = self.process_ie_result(entry,
- download=download,
- extra_info=extra)
- playlist_results.append(entry_result)
- ie_result['entries'] = playlist_results
- self.to_screen('[download] Finished downloading playlist: %s' % playlist)
- return ie_result
- elif result_type == 'compat_list':
- self.report_warning(
- 'Extractor %s returned a compat_list result. '
- 'It needs to be updated.' % ie_result.get('extractor'))
-
- def _fixup(r):
- self.add_extra_info(
- r,
- {
- 'extractor': ie_result['extractor'],
- 'webpage_url': ie_result['webpage_url'],
- 'webpage_url_basename': url_basename(ie_result['webpage_url']),
- 'extractor_key': ie_result['extractor_key'],
- }
- )
- return r
- ie_result['entries'] = [
- self.process_ie_result(_fixup(r), download, extra_info)
- for r in ie_result['entries']
- ]
- return ie_result
- else:
- raise Exception('Invalid result type: %s' % result_type)
-
- def _build_format_filter(self, filter_spec):
- " Returns a function to filter the formats according to the filter_spec "
-
- OPERATORS = {
- '<': operator.lt,
- '<=': operator.le,
- '>': operator.gt,
- '>=': operator.ge,
- '=': operator.eq,
- '!=': operator.ne,
- }
- operator_rex = re.compile(r'''(?x)\s*
- (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
- \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
- (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
- $
- ''' % '|'.join(map(re.escape, OPERATORS.keys())))
- m = operator_rex.search(filter_spec)
- if m:
- try:
- comparison_value = int(m.group('value'))
- except ValueError:
- comparison_value = parse_filesize(m.group('value'))
- if comparison_value is None:
- comparison_value = parse_filesize(m.group('value') + 'B')
- if comparison_value is None:
- raise ValueError(
- 'Invalid value %r in format specification %r' % (
- m.group('value'), filter_spec))
- op = OPERATORS[m.group('op')]
-
- if not m:
- STR_OPERATORS = {
- '=': operator.eq,
- '^=': lambda attr, value: attr.startswith(value),
- '$=': lambda attr, value: attr.endswith(value),
- '*=': lambda attr, value: value in attr,
- }
- str_operator_rex = re.compile(r'''(?x)
- \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
- \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
- \s*(?P<value>[a-zA-Z0-9._-]+)
- \s*$
- ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
- m = str_operator_rex.search(filter_spec)
- if m:
- comparison_value = m.group('value')
- str_op = STR_OPERATORS[m.group('op')]
- if m.group('negation'):
- op = lambda attr, value: not str_op(attr, value)
- else:
- op = str_op
-
- if not m:
- raise ValueError('Invalid filter specification %r' % filter_spec)
-
- def _filter(f):
- actual_value = f.get(m.group('key'))
- if actual_value is None:
- return m.group('none_inclusive')
- return op(actual_value, comparison_value)
- return _filter
-
- def _default_format_spec(self, info_dict, download=True):
-
- def can_merge():
- merger = FFmpegMergerPP(self)
- return merger.available and merger.can_merge()
-
- def prefer_best():
- if self.params.get('simulate', False):
- return False
- if not download:
- return False
- if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
- return True
- if info_dict.get('is_live'):
- return True
- if not can_merge():
- return True
- return False
-
- req_format_list = ['bestvideo+bestaudio', 'best']
- if prefer_best():
- req_format_list.reverse()
- return '/'.join(req_format_list)
-
- def build_format_selector(self, format_spec):
- def syntax_error(note, start):
- message = (
- 'Invalid format specification: '
- '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
- return SyntaxError(message)
-
- PICKFIRST = 'PICKFIRST'
- MERGE = 'MERGE'
- SINGLE = 'SINGLE'
- GROUP = 'GROUP'
- FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
-
- def _parse_filter(tokens):
- filter_parts = []
- for type, string, start, _, _ in tokens:
- if type == tokenize.OP and string == ']':
- return ''.join(filter_parts)
- else:
- filter_parts.append(string)
-
- def _remove_unused_ops(tokens):
- # Remove operators that we don't use and join them with the surrounding strings
- # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
- ALLOWED_OPS = ('/', '+', ',', '(', ')')
- last_string, last_start, last_end, last_line = None, None, None, None
- for type, string, start, end, line in tokens:
- if type == tokenize.OP and string == '[':
- if last_string:
- yield tokenize.NAME, last_string, last_start, last_end, last_line
- last_string = None
- yield type, string, start, end, line
- # everything inside brackets will be handled by _parse_filter
- for type, string, start, end, line in tokens:
- yield type, string, start, end, line
- if type == tokenize.OP and string == ']':
- break
- elif type == tokenize.OP and string in ALLOWED_OPS:
- if last_string:
- yield tokenize.NAME, last_string, last_start, last_end, last_line
- last_string = None
- yield type, string, start, end, line
- elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
- if not last_string:
- last_string = string
- last_start = start
- last_end = end
- else:
- last_string += string
- if last_string:
- yield tokenize.NAME, last_string, last_start, last_end, last_line
-
- def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
- selectors = []
- current_selector = None
- for type, string, start, _, _ in tokens:
- # ENCODING is only defined in python 3.x
- if type == getattr(tokenize, 'ENCODING', None):
- continue
- elif type in [tokenize.NAME, tokenize.NUMBER]:
- current_selector = FormatSelector(SINGLE, string, [])
- elif type == tokenize.OP:
- if string == ')':
- if not inside_group:
- # ')' will be handled by the parentheses group
- tokens.restore_last_token()
- break
- elif inside_merge and string in ['/', ',']:
- tokens.restore_last_token()
- break
- elif inside_choice and string == ',':
- tokens.restore_last_token()
- break
- elif string == ',':
- if not current_selector:
- raise syntax_error('"," must follow a format selector', start)
- selectors.append(current_selector)
- current_selector = None
- elif string == '/':
- if not current_selector:
- raise syntax_error('"/" must follow a format selector', start)
- first_choice = current_selector
- second_choice = _parse_format_selection(tokens, inside_choice=True)
- current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
- elif string == '[':
- if not current_selector:
- current_selector = FormatSelector(SINGLE, 'best', [])
- format_filter = _parse_filter(tokens)
- current_selector.filters.append(format_filter)
- elif string == '(':
- if current_selector:
- raise syntax_error('Unexpected "("', start)
- group = _parse_format_selection(tokens, inside_group=True)
- current_selector = FormatSelector(GROUP, group, [])
- elif string == '+':
- video_selector = current_selector
- audio_selector = _parse_format_selection(tokens, inside_merge=True)
- if not video_selector or not audio_selector:
- raise syntax_error('"+" must be between two format selectors', start)
- current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
- else:
- raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
- elif type == tokenize.ENDMARKER:
- break
- if current_selector:
- selectors.append(current_selector)
- return selectors
-
- def _build_selector_function(selector):
- if isinstance(selector, list):
- fs = [_build_selector_function(s) for s in selector]
-
- def selector_function(ctx):
- for f in fs:
- for format in f(ctx):
- yield format
- return selector_function
- elif selector.type == GROUP:
- selector_function = _build_selector_function(selector.selector)
- elif selector.type == PICKFIRST:
- fs = [_build_selector_function(s) for s in selector.selector]
-
- def selector_function(ctx):
- for f in fs:
- picked_formats = list(f(ctx))
- if picked_formats:
- return picked_formats
- return []
- elif selector.type == SINGLE:
- format_spec = selector.selector
-
- def selector_function(ctx):
- formats = list(ctx['formats'])
- if not formats:
- return
- if format_spec == 'all':
- for f in formats:
- yield f
- elif format_spec in ['best', 'worst', None]:
- format_idx = 0 if format_spec == 'worst' else -1
- audiovideo_formats = [
- f for f in formats
- if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
- if audiovideo_formats:
- yield audiovideo_formats[format_idx]
- # for extractors with incomplete formats (audio only (soundcloud)
- # or video only (imgur)) we will fallback to best/worst
- # {video,audio}-only format
- elif ctx['incomplete_formats']:
- yield formats[format_idx]
- elif format_spec == 'bestaudio':
- audio_formats = [
- f for f in formats
- if f.get('vcodec') == 'none']
- if audio_formats:
- yield audio_formats[-1]
- elif format_spec == 'worstaudio':
- audio_formats = [
- f for f in formats
- if f.get('vcodec') == 'none']
- if audio_formats:
- yield audio_formats[0]
- elif format_spec == 'bestvideo':
- video_formats = [
- f for f in formats
- if f.get('acodec') == 'none']
- if video_formats:
- yield video_formats[-1]
- elif format_spec == 'worstvideo':
- video_formats = [
- f for f in formats
- if f.get('acodec') == 'none']
- if video_formats:
- yield video_formats[0]
- else:
- extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
- if format_spec in extensions:
- filter_f = lambda f: f['ext'] == format_spec
- else:
- filter_f = lambda f: f['format_id'] == format_spec
- matches = list(filter(filter_f, formats))
- if matches:
- yield matches[-1]
- elif selector.type == MERGE:
- def _merge(formats_info):
- format_1, format_2 = [f['format_id'] for f in formats_info]
- # The first format must contain the video and the
- # second the audio
- if formats_info[0].get('vcodec') == 'none':
- self.report_error('The first format must '
- 'contain the video, try using '
- '"-f %s+%s"' % (format_2, format_1))
- return
- # Formats must be opposite (video+audio)
- if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
- self.report_error(
- 'Both formats %s and %s are video-only, you must specify "-f video+audio"'
- % (format_1, format_2))
- return
- output_ext = (
- formats_info[0]['ext']
- if self.params.get('merge_output_format') is None
- else self.params['merge_output_format'])
- return {
- 'requested_formats': formats_info,
- 'format': '%s+%s' % (formats_info[0].get('format'),
- formats_info[1].get('format')),
- 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
- formats_info[1].get('format_id')),
- 'width': formats_info[0].get('width'),
- 'height': formats_info[0].get('height'),
- 'resolution': formats_info[0].get('resolution'),
- 'fps': formats_info[0].get('fps'),
- 'vcodec': formats_info[0].get('vcodec'),
- 'vbr': formats_info[0].get('vbr'),
- 'stretched_ratio': formats_info[0].get('stretched_ratio'),
- 'acodec': formats_info[1].get('acodec'),
- 'abr': formats_info[1].get('abr'),
- 'ext': output_ext,
- }
- video_selector, audio_selector = map(_build_selector_function, selector.selector)
-
- def selector_function(ctx):
- for pair in itertools.product(
- video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
- yield _merge(pair)
-
- filters = [self._build_format_filter(f) for f in selector.filters]
-
- def final_selector(ctx):
- ctx_copy = copy.deepcopy(ctx)
- for _filter in filters:
- ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
- return selector_function(ctx_copy)
- return final_selector
-
- stream = io.BytesIO(format_spec.encode('utf-8'))
- try:
- tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
- except tokenize.TokenError:
- raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
-
- class TokenIterator(object):
- def __init__(self, tokens):
- self.tokens = tokens
- self.counter = 0
-
- def __iter__(self):
- return self
-
- def __next__(self):
- if self.counter >= len(self.tokens):
- raise StopIteration()
- value = self.tokens[self.counter]
- self.counter += 1
- return value
-
- next = __next__
-
- def restore_last_token(self):
- self.counter -= 1
-
- parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
- return _build_selector_function(parsed_selector)
-
- def _calc_headers(self, info_dict):
- res = std_headers.copy()
-
- add_headers = info_dict.get('http_headers')
- if add_headers:
- res.update(add_headers)
-
- cookies = self._calc_cookies(info_dict)
- if cookies:
- res['Cookie'] = cookies
-
- if 'X-Forwarded-For' not in res:
- x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
- if x_forwarded_for_ip:
- res['X-Forwarded-For'] = x_forwarded_for_ip
-
- return res
-
- def _calc_cookies(self, info_dict):
- pr = sanitized_Request(info_dict['url'])
- self.cookiejar.add_cookie_header(pr)
- return pr.get_header('Cookie')
-
- def process_video_result(self, info_dict, download=True):
- assert info_dict.get('_type', 'video') == 'video'
-
- if 'id' not in info_dict:
- raise ExtractorError('Missing "id" field in extractor result')
- if 'title' not in info_dict:
- raise ExtractorError('Missing "title" field in extractor result')
-
- def report_force_conversion(field, field_not, conversion):
- self.report_warning(
- '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
- % (field, field_not, conversion))
-
- def sanitize_string_field(info, string_field):
- field = info.get(string_field)
- if field is None or isinstance(field, compat_str):
- return
- report_force_conversion(string_field, 'a string', 'string')
- info[string_field] = compat_str(field)
-
- def sanitize_numeric_fields(info):
- for numeric_field in self._NUMERIC_FIELDS:
- field = info.get(numeric_field)
- if field is None or isinstance(field, compat_numeric_types):
- continue
- report_force_conversion(numeric_field, 'numeric', 'int')
- info[numeric_field] = int_or_none(field)
-
- sanitize_string_field(info_dict, 'id')
- sanitize_numeric_fields(info_dict)
-
- if 'playlist' not in info_dict:
- # It isn't part of a playlist
- info_dict['playlist'] = None
- info_dict['playlist_index'] = None
-
- thumbnails = info_dict.get('thumbnails')
- if thumbnails is None:
- thumbnail = info_dict.get('thumbnail')
- if thumbnail:
- info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
- if thumbnails:
- thumbnails.sort(key=lambda t: (
- t.get('preference') if t.get('preference') is not None else -1,
- t.get('width') if t.get('width') is not None else -1,
- t.get('height') if t.get('height') is not None else -1,
- t.get('id') if t.get('id') is not None else '', t.get('url')))
- for i, t in enumerate(thumbnails):
- t['url'] = sanitize_url(t['url'])
- if t.get('width') and t.get('height'):
- t['resolution'] = '%dx%d' % (t['width'], t['height'])
- if t.get('id') is None:
- t['id'] = '%d' % i
-
- if self.params.get('list_thumbnails'):
- self.list_thumbnails(info_dict)
- return
-
- thumbnail = info_dict.get('thumbnail')
- if thumbnail:
- info_dict['thumbnail'] = sanitize_url(thumbnail)
- elif thumbnails:
- info_dict['thumbnail'] = thumbnails[-1]['url']
-
- if 'display_id' not in info_dict and 'id' in info_dict:
- info_dict['display_id'] = info_dict['id']
-
- if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
- # Working around out-of-range timestamp values (e.g. negative ones on Windows,
- # see http://bugs.python.org/issue1646728)
- try:
- upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
- info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
- except (ValueError, OverflowError, OSError):
- pass
-
- # Auto generate title fields corresponding to the *_number fields when missing
- # in order to always have clean titles. This is very common for TV series.
- for field in ('chapter', 'season', 'episode'):
- if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
- info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
-
- for cc_kind in ('subtitles', 'automatic_captions'):
- cc = info_dict.get(cc_kind)
- if cc:
- for _, subtitle in cc.items():
- for subtitle_format in subtitle:
- if subtitle_format.get('url'):
- subtitle_format['url'] = sanitize_url(subtitle_format['url'])
- if subtitle_format.get('ext') is None:
- subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
-
- automatic_captions = info_dict.get('automatic_captions')
- subtitles = info_dict.get('subtitles')
-
- if self.params.get('listsubtitles', False):
- if 'automatic_captions' in info_dict:
- self.list_subtitles(
- info_dict['id'], automatic_captions, 'automatic captions')
- self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
- return
-
- info_dict['requested_subtitles'] = self.process_subtitles(
- info_dict['id'], subtitles, automatic_captions)
-
- # We now pick which formats have to be downloaded
- if info_dict.get('formats') is None:
- # There's only one format available
- formats = [info_dict]
- else:
- formats = info_dict['formats']
-
- if not formats:
- raise ExtractorError('No video formats found!')
-
- def is_wellformed(f):
- url = f.get('url')
- if not url:
- self.report_warning(
- '"url" field is missing or empty - skipping format, '
- 'there is an error in extractor')
- return False
- if isinstance(url, bytes):
- sanitize_string_field(f, 'url')
- return True
-
- # Filter out malformed formats for better extraction robustness
- formats = list(filter(is_wellformed, formats))
-
- formats_dict = {}
-
- # We check that all the formats have the format and format_id fields
- for i, format in enumerate(formats):
- sanitize_string_field(format, 'format_id')
- sanitize_numeric_fields(format)
- format['url'] = sanitize_url(format['url'])
- if not format.get('format_id'):
- format['format_id'] = compat_str(i)
- else:
- # Sanitize format_id from characters used in format selector expression
- format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
- format_id = format['format_id']
- if format_id not in formats_dict:
- formats_dict[format_id] = []
- formats_dict[format_id].append(format)
-
- # Make sure all formats have unique format_id
- for format_id, ambiguous_formats in formats_dict.items():
- if len(ambiguous_formats) > 1:
- for i, format in enumerate(ambiguous_formats):
- format['format_id'] = '%s-%d' % (format_id, i)
-
- for i, format in enumerate(formats):
- if format.get('format') is None:
- format['format'] = '{id} - {res}{note}'.format(
- id=format['format_id'],
- res=self.format_resolution(format),
- note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
- )
- # Automatically determine file extension if missing
- if format.get('ext') is None:
- format['ext'] = determine_ext(format['url']).lower()
- # Automatically determine protocol if missing (useful for format
- # selection purposes)
- if format.get('protocol') is None:
- format['protocol'] = determine_protocol(format)
- # Add HTTP headers, so that external programs can use them from the
- # json output
- full_format_info = info_dict.copy()
- full_format_info.update(format)
- format['http_headers'] = self._calc_headers(full_format_info)
- # Remove private housekeeping stuff
- if '__x_forwarded_for_ip' in info_dict:
- del info_dict['__x_forwarded_for_ip']
-
- # TODO Central sorting goes here
-
- if formats[0] is not info_dict:
- # only set the 'formats' fields if the original info_dict list them
- # otherwise we end up with a circular reference, the first (and unique)
- # element in the 'formats' field in info_dict is info_dict itself,
- # which can't be exported to json
- info_dict['formats'] = formats
- if self.params.get('listformats'):
- self.list_formats(info_dict)
- return
-
- req_format = self.params.get('format')
- if req_format is None:
- req_format = self._default_format_spec(info_dict, download=download)
- if self.params.get('verbose'):
- self.to_stdout('[debug] Default format spec: %s' % req_format)
-
- format_selector = self.build_format_selector(req_format)
-
- # While in format selection we may need to have an access to the original
- # format set in order to calculate some metrics or do some processing.
- # For now we need to be able to guess whether original formats provided
- # by extractor are incomplete or not (i.e. whether extractor provides only
- # video-only or audio-only formats) for proper formats selection for
- # extractors with such incomplete formats (see
- # https://github.com/ytdl-org/youtube-dl/pull/5556).
- # Since formats may be filtered during format selection and may not match
- # the original formats the results may be incorrect. Thus original formats
- # or pre-calculated metrics should be passed to format selection routines
- # as well.
- # We will pass a context object containing all necessary additional data
- # instead of just formats.
- # This fixes incorrect format selection issue (see
- # https://github.com/ytdl-org/youtube-dl/issues/10083).
- incomplete_formats = (
- # All formats are video-only or
- all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
- # all formats are audio-only
- or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
-
- ctx = {
- 'formats': formats,
- 'incomplete_formats': incomplete_formats,
- }
-
- formats_to_download = list(format_selector(ctx))
- if not formats_to_download:
- raise ExtractorError('requested format not available',
- expected=True)
-
- if download:
- if len(formats_to_download) > 1:
- self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
- for format in formats_to_download:
- new_info = dict(info_dict)
- new_info.update(format)
- self.process_info(new_info)
- # We update the info dict with the best quality format (backwards compatibility)
- info_dict.update(formats_to_download[-1])
- return info_dict
-
- def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
- """Select the requested subtitles and their format"""
- available_subs = {}
- if normal_subtitles and self.params.get('writesubtitles'):
- available_subs.update(normal_subtitles)
- if automatic_captions and self.params.get('writeautomaticsub'):
- for lang, cap_info in automatic_captions.items():
- if lang not in available_subs:
- available_subs[lang] = cap_info
-
- if (not self.params.get('writesubtitles') and not
- self.params.get('writeautomaticsub') or not
- available_subs):
- return None
-
- if self.params.get('allsubtitles', False):
- requested_langs = available_subs.keys()
- else:
- if self.params.get('subtitleslangs', False):
- requested_langs = self.params.get('subtitleslangs')
- elif 'en' in available_subs:
- requested_langs = ['en']
- else:
- requested_langs = [list(available_subs.keys())[0]]
-
- formats_query = self.params.get('subtitlesformat', 'best')
- formats_preference = formats_query.split('/') if formats_query else []
- subs = {}
- for lang in requested_langs:
- formats = available_subs.get(lang)
- if formats is None:
- self.report_warning('%s subtitles not available for %s' % (lang, video_id))
- continue
- for ext in formats_preference:
- if ext == 'best':
- f = formats[-1]
- break
- matches = list(filter(lambda f: f['ext'] == ext, formats))
- if matches:
- f = matches[-1]
- break
- else:
- f = formats[-1]
- self.report_warning(
- 'No subtitle format found matching "%s" for language %s, '
- 'using %s' % (formats_query, lang, f['ext']))
- subs[lang] = f
- return subs
-
- def __forced_printings(self, info_dict, filename, incomplete):
- def print_mandatory(field):
- if (self.params.get('force%s' % field, False)
- and (not incomplete or info_dict.get(field) is not None)):
- self.to_stdout(info_dict[field])
-
- def print_optional(field):
- if (self.params.get('force%s' % field, False)
- and info_dict.get(field) is not None):
- self.to_stdout(info_dict[field])
-
- print_mandatory('title')
- print_mandatory('id')
- if self.params.get('forceurl', False) and not incomplete:
- if info_dict.get('requested_formats') is not None:
- for f in info_dict['requested_formats']:
- self.to_stdout(f['url'] + f.get('play_path', ''))
- else:
- # For RTMP URLs, also include the playpath
- self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
- print_optional('thumbnail')
- print_optional('description')
- if self.params.get('forcefilename', False) and filename is not None:
- self.to_stdout(filename)
- if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
- self.to_stdout(formatSeconds(info_dict['duration']))
- print_mandatory('format')
- if self.params.get('forcejson', False):
- self.to_stdout(json.dumps(info_dict))
-
- def process_info(self, info_dict):
- """Process a single resolved IE result."""
-
- assert info_dict.get('_type', 'video') == 'video'
-
- max_downloads = self.params.get('max_downloads')
- if max_downloads is not None:
- if self._num_downloads >= int(max_downloads):
- raise MaxDownloadsReached()
-
- # TODO: backward compatibility, to be removed
- info_dict['fulltitle'] = info_dict['title']
-
- if 'format' not in info_dict:
- info_dict['format'] = info_dict['ext']
-
- reason = self._match_entry(info_dict, incomplete=False)
- if reason is not None:
- self.to_screen('[download] ' + reason)
- return
-
- self._num_downloads += 1
-
- info_dict['_filename'] = filename = self.prepare_filename(info_dict)
-
- # Forced printings
- self.__forced_printings(info_dict, filename, incomplete=False)
-
- # Do nothing else if in simulate mode
- if self.params.get('simulate', False):
- return
-
- if filename is None:
- return
-
- def ensure_dir_exists(path):
- try:
- dn = os.path.dirname(path)
- if dn and not os.path.exists(dn):
- os.makedirs(dn)
- return True
- except (OSError, IOError) as err:
- self.report_error('unable to create directory ' + error_to_compat_str(err))
- return False
-
- if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
- return
-
- if self.params.get('writedescription', False):
- descfn = replace_extension(filename, 'description', info_dict.get('ext'))
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
- self.to_screen('[info] Video description is already present')
- elif info_dict.get('description') is None:
- self.report_warning('There\'s no description to write.')
- else:
- try:
- self.to_screen('[info] Writing video description to: ' + descfn)
- with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
- descfile.write(info_dict['description'])
- except (OSError, IOError):
- self.report_error('Cannot write description file ' + descfn)
- return
-
- if self.params.get('writeannotations', False):
- annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
- self.to_screen('[info] Video annotations are already present')
- elif not info_dict.get('annotations'):
- self.report_warning('There are no annotations to write.')
- else:
- try:
- self.to_screen('[info] Writing video annotations to: ' + annofn)
- with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
- annofile.write(info_dict['annotations'])
- except (KeyError, TypeError):
- self.report_warning('There are no annotations to write.')
- except (OSError, IOError):
- self.report_error('Cannot write annotations file: ' + annofn)
- return
-
- subtitles_are_requested = any([self.params.get('writesubtitles', False),
- self.params.get('writeautomaticsub')])
-
- if subtitles_are_requested and info_dict.get('requested_subtitles'):
- # subtitles download errors are already managed as troubles in relevant IE
- # that way it will silently go on when used with unsupporting IE
- subtitles = info_dict['requested_subtitles']
- ie = self.get_info_extractor(info_dict['extractor_key'])
- for sub_lang, sub_info in subtitles.items():
- sub_format = sub_info['ext']
- sub_filename = subtitles_filename(filename, sub_lang, sub_format)
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
- self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
- else:
- self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
- if sub_info.get('data') is not None:
- try:
- # Use newline='' to prevent conversion of newline characters
- # See https://github.com/ytdl-org/youtube-dl/issues/10268
- with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
- subfile.write(sub_info['data'])
- except (OSError, IOError):
- self.report_error('Cannot write subtitles file ' + sub_filename)
- return
- else:
- try:
- sub_data = ie._request_webpage(
- sub_info['url'], info_dict['id'], note=False).read()
- with io.open(encodeFilename(sub_filename), 'wb') as subfile:
- subfile.write(sub_data)
- except (ExtractorError, IOError, OSError, ValueError) as err:
- self.report_warning('Unable to download subtitle for "%s": %s' %
- (sub_lang, error_to_compat_str(err)))
- continue
-
- if self.params.get('writeinfojson', False):
- infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
- self.to_screen('[info] Video description metadata is already present')
- else:
- self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
- try:
- write_json_file(self.filter_requested_info(info_dict), infofn)
- except (OSError, IOError):
- self.report_error('Cannot write metadata to JSON file ' + infofn)
- return
-
- self._write_thumbnails(info_dict, filename)
-
- if not self.params.get('skip_download', False):
- try:
- def dl(name, info):
- fd = get_suitable_downloader(info, self.params)(self, self.params)
- for ph in self._progress_hooks:
- fd.add_progress_hook(ph)
- if self.params.get('verbose'):
- self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
- return fd.download(name, info)
-
- if info_dict.get('requested_formats') is not None:
- downloaded = []
- success = True
- merger = FFmpegMergerPP(self)
- if not merger.available:
- postprocessors = []
- self.report_warning('You have requested multiple '
- 'formats but ffmpeg or avconv are not installed.'
- ' The formats won\'t be merged.')
- else:
- postprocessors = [merger]
-
- def compatible_formats(formats):
- video, audio = formats
- # Check extension
- video_ext, audio_ext = video.get('ext'), audio.get('ext')
- if video_ext and audio_ext:
- COMPATIBLE_EXTS = (
- ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
- ('webm')
- )
- for exts in COMPATIBLE_EXTS:
- if video_ext in exts and audio_ext in exts:
- return True
- # TODO: Check acodec/vcodec
- return False
-
- filename_real_ext = os.path.splitext(filename)[1][1:]
- filename_wo_ext = (
- os.path.splitext(filename)[0]
- if filename_real_ext == info_dict['ext']
- else filename)
- requested_formats = info_dict['requested_formats']
- if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
- info_dict['ext'] = 'mkv'
- self.report_warning(
- 'Requested formats are incompatible for merge and will be merged into mkv.')
- # Ensure filename always has a correct extension for successful merge
- filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
- if os.path.exists(encodeFilename(filename)):
- self.to_screen(
- '[download] %s has already been downloaded and '
- 'merged' % filename)
- else:
- for f in requested_formats:
- new_info = dict(info_dict)
- new_info.update(f)
- fname = prepend_extension(
- self.prepare_filename(new_info),
- 'f%s' % f['format_id'], new_info['ext'])
- if not ensure_dir_exists(fname):
- return
- downloaded.append(fname)
- partial_success = dl(fname, new_info)
- success = success and partial_success
- info_dict['__postprocessors'] = postprocessors
- info_dict['__files_to_merge'] = downloaded
- else:
- # Just a single file
- success = dl(filename, info_dict)
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- self.report_error('unable to download video data: %s' % error_to_compat_str(err))
- return
- except (OSError, IOError) as err:
- raise UnavailableVideoError(err)
- except (ContentTooShortError, ) as err:
- self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
- return
-
- if success and filename != '-':
- # Fixup content
- fixup_policy = self.params.get('fixup')
- if fixup_policy is None:
- fixup_policy = 'detect_or_warn'
-
- INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
-
- stretched_ratio = info_dict.get('stretched_ratio')
- if stretched_ratio is not None and stretched_ratio != 1:
- if fixup_policy == 'warn':
- self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
- info_dict['id'], stretched_ratio))
- elif fixup_policy == 'detect_or_warn':
- stretched_pp = FFmpegFixupStretchedPP(self)
- if stretched_pp.available:
- info_dict.setdefault('__postprocessors', [])
- info_dict['__postprocessors'].append(stretched_pp)
- else:
- self.report_warning(
- '%s: Non-uniform pixel ratio (%s). %s'
- % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
- else:
- assert fixup_policy in ('ignore', 'never')
-
- if (info_dict.get('requested_formats') is None
- and info_dict.get('container') == 'm4a_dash'):
- if fixup_policy == 'warn':
- self.report_warning(
- '%s: writing DASH m4a. '
- 'Only some players support this container.'
- % info_dict['id'])
- elif fixup_policy == 'detect_or_warn':
- fixup_pp = FFmpegFixupM4aPP(self)
- if fixup_pp.available:
- info_dict.setdefault('__postprocessors', [])
- info_dict['__postprocessors'].append(fixup_pp)
- else:
- self.report_warning(
- '%s: writing DASH m4a. '
- 'Only some players support this container. %s'
- % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
- else:
- assert fixup_policy in ('ignore', 'never')
-
- if (info_dict.get('protocol') == 'm3u8_native'
- or info_dict.get('protocol') == 'm3u8'
- and self.params.get('hls_prefer_native')):
- if fixup_policy == 'warn':
- self.report_warning('%s: malformed AAC bitstream detected.' % (
- info_dict['id']))
- elif fixup_policy == 'detect_or_warn':
- fixup_pp = FFmpegFixupM3u8PP(self)
- if fixup_pp.available:
- info_dict.setdefault('__postprocessors', [])
- info_dict['__postprocessors'].append(fixup_pp)
- else:
- self.report_warning(
- '%s: malformed AAC bitstream detected. %s'
- % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
- else:
- assert fixup_policy in ('ignore', 'never')
-
- try:
- self.post_process(filename, info_dict)
- except (PostProcessingError) as err:
- self.report_error('postprocessing: %s' % str(err))
- return
- self.record_download_archive(info_dict)
-
- def download(self, url_list):
- """Download a given list of URLs."""
- outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
- if (len(url_list) > 1
- and outtmpl != '-'
- and '%' not in outtmpl
- and self.params.get('max_downloads') != 1):
- raise SameFileError(outtmpl)
-
- for url in url_list:
- try:
- # It also downloads the videos
- res = self.extract_info(
- url, force_generic_extractor=self.params.get('force_generic_extractor', False))
- except UnavailableVideoError:
- self.report_error('unable to download video')
- except MaxDownloadsReached:
- self.to_screen('[info] Maximum number of downloaded files reached.')
- raise
- else:
- if self.params.get('dump_single_json', False):
- self.to_stdout(json.dumps(res))
-
- return self._download_retcode
-
- def download_with_info_file(self, info_filename):
- with contextlib.closing(fileinput.FileInput(
- [info_filename], mode='r',
- openhook=fileinput.hook_encoded('utf-8'))) as f:
- # FileInput doesn't have a read method, we can't call json.load
- info = self.filter_requested_info(json.loads('\n'.join(f)))
- try:
- self.process_ie_result(info, download=True)
- except DownloadError:
- webpage_url = info.get('webpage_url')
- if webpage_url is not None:
- self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
- return self.download([webpage_url])
- else:
- raise
- return self._download_retcode
-
- @staticmethod
- def filter_requested_info(info_dict):
- return dict(
- (k, v) for k, v in info_dict.items()
- if k not in ['requested_formats', 'requested_subtitles'])
-
- def post_process(self, filename, ie_info):
- """Run all the postprocessors on the given file."""
- info = dict(ie_info)
- info['filepath'] = filename
- pps_chain = []
- if ie_info.get('__postprocessors') is not None:
- pps_chain.extend(ie_info['__postprocessors'])
- pps_chain.extend(self._pps)
- for pp in pps_chain:
- files_to_delete = []
- try:
- files_to_delete, info = pp.run(info)
- except PostProcessingError as e:
- self.report_error(e.msg)
- if files_to_delete and not self.params.get('keepvideo', False):
- for old_filename in files_to_delete:
- self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
- try:
- os.remove(encodeFilename(old_filename))
- except (IOError, OSError):
- self.report_warning('Unable to remove downloaded original file')
-
- def _make_archive_id(self, info_dict):
- video_id = info_dict.get('id')
- if not video_id:
- return
- # Future-proof against any change in case
- # and backwards compatibility with prior versions
- extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
- if extractor is None:
- url = str_or_none(info_dict.get('url'))
- if not url:
- return
- # Try to find matching extractor for the URL and take its ie_key
- for ie in self._ies:
- if ie.suitable(url):
- extractor = ie.ie_key()
- break
- else:
- return
- return extractor.lower() + ' ' + video_id
-
- def in_download_archive(self, info_dict):
- fn = self.params.get('download_archive')
- if fn is None:
- return False
-
- vid_id = self._make_archive_id(info_dict)
- if not vid_id:
- return False # Incomplete video information
-
- try:
- with locked_file(fn, 'r', encoding='utf-8') as archive_file:
- for line in archive_file:
- if line.strip() == vid_id:
- return True
- except IOError as ioe:
- if ioe.errno != errno.ENOENT:
- raise
- return False
-
- def record_download_archive(self, info_dict):
- fn = self.params.get('download_archive')
- if fn is None:
- return
- vid_id = self._make_archive_id(info_dict)
- assert vid_id
- with locked_file(fn, 'a', encoding='utf-8') as archive_file:
- archive_file.write(vid_id + '\n')
-
- @staticmethod
- def format_resolution(format, default='unknown'):
- if format.get('vcodec') == 'none':
- return 'audio only'
- if format.get('resolution') is not None:
- return format['resolution']
- if format.get('height') is not None:
- if format.get('width') is not None:
- res = '%sx%s' % (format['width'], format['height'])
- else:
- res = '%sp' % format['height']
- elif format.get('width') is not None:
- res = '%dx?' % format['width']
- else:
- res = default
- return res
-
- def _format_note(self, fdict):
- res = ''
- if fdict.get('ext') in ['f4f', 'f4m']:
- res += '(unsupported) '
- if fdict.get('language'):
- if res:
- res += ' '
- res += '[%s] ' % fdict['language']
- if fdict.get('format_note') is not None:
- res += fdict['format_note'] + ' '
- if fdict.get('tbr') is not None:
- res += '%4dk ' % fdict['tbr']
- if fdict.get('container') is not None:
- if res:
- res += ', '
- res += '%s container' % fdict['container']
- if (fdict.get('vcodec') is not None
- and fdict.get('vcodec') != 'none'):
- if res:
- res += ', '
- res += fdict['vcodec']
- if fdict.get('vbr') is not None:
- res += '@'
- elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
- res += 'video@'
- if fdict.get('vbr') is not None:
- res += '%4dk' % fdict['vbr']
- if fdict.get('fps') is not None:
- if res:
- res += ', '
- res += '%sfps' % fdict['fps']
- if fdict.get('acodec') is not None:
- if res:
- res += ', '
- if fdict['acodec'] == 'none':
- res += 'video only'
- else:
- res += '%-5s' % fdict['acodec']
- elif fdict.get('abr') is not None:
- if res:
- res += ', '
- res += 'audio'
- if fdict.get('abr') is not None:
- res += '@%3dk' % fdict['abr']
- if fdict.get('asr') is not None:
- res += ' (%5dHz)' % fdict['asr']
- if fdict.get('filesize') is not None:
- if res:
- res += ', '
- res += format_bytes(fdict['filesize'])
- elif fdict.get('filesize_approx') is not None:
- if res:
- res += ', '
- res += '~' + format_bytes(fdict['filesize_approx'])
- return res
-
- def list_formats(self, info_dict):
- formats = info_dict.get('formats', [info_dict])
- table = [
- [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
- for f in formats
- if f.get('preference') is None or f['preference'] >= -1000]
- if len(formats) > 1:
- table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
-
- header_line = ['format code', 'extension', 'resolution', 'note']
- self.to_screen(
- '[info] Available formats for %s:\n%s' %
- (info_dict['id'], render_table(header_line, table)))
-
- def list_thumbnails(self, info_dict):
- thumbnails = info_dict.get('thumbnails')
- if not thumbnails:
- self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
- return
-
- self.to_screen(
- '[info] Thumbnails for %s:' % info_dict['id'])
- self.to_screen(render_table(
- ['ID', 'width', 'height', 'URL'],
- [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
-
- def list_subtitles(self, video_id, subtitles, name='subtitles'):
- if not subtitles:
- self.to_screen('%s has no %s' % (video_id, name))
- return
- self.to_screen(
- 'Available %s for %s:' % (name, video_id))
- self.to_screen(render_table(
- ['Language', 'formats'],
- [[lang, ', '.join(f['ext'] for f in reversed(formats))]
- for lang, formats in subtitles.items()]))
-
- def urlopen(self, req):
- """ Start an HTTP download """
- if isinstance(req, compat_basestring):
- req = sanitized_Request(req)
- return self._opener.open(req, timeout=self._socket_timeout)
-
- def print_debug_header(self):
- if not self.params.get('verbose'):
- return
-
- if type('') is not compat_str:
- # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
- self.report_warning(
- 'Your Python is broken! Update to a newer and supported version')
-
- stdout_encoding = getattr(
- sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
- encoding_str = (
- '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
- locale.getpreferredencoding(),
- sys.getfilesystemencoding(),
- stdout_encoding,
- self.get_encoding()))
- write_string(encoding_str, encoding=None)
-
- self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
- if _LAZY_LOADER:
- self._write_string('[debug] Lazy loading extractors enabled' + '\n')
- try:
- sp = subprocess.Popen(
- ['git', 'rev-parse', '--short', 'HEAD'],
- stdout=subprocess.PIPE, stderr=subprocess.PIPE,
- cwd=os.path.dirname(os.path.abspath(__file__)))
- out, err = sp.communicate()
- out = out.decode().strip()
- if re.match('[0-9a-f]+', out):
- self._write_string('[debug] Git HEAD: ' + out + '\n')
- except Exception:
- try:
- sys.exc_clear()
- except Exception:
- pass
-
- def python_implementation():
- impl_name = platform.python_implementation()
- if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
- return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
- return impl_name
-
- self._write_string('[debug] Python version %s (%s) - %s\n' % (
- platform.python_version(), python_implementation(),
- platform_name()))
-
- exe_versions = FFmpegPostProcessor.get_versions(self)
- exe_versions['rtmpdump'] = rtmpdump_version()
- exe_versions['phantomjs'] = PhantomJSwrapper._version()
- exe_str = ', '.join(
- '%s %s' % (exe, v)
- for exe, v in sorted(exe_versions.items())
- if v
- )
- if not exe_str:
- exe_str = 'none'
- self._write_string('[debug] exe versions: %s\n' % exe_str)
-
- proxy_map = {}
- for handler in self._opener.handlers:
- if hasattr(handler, 'proxies'):
- proxy_map.update(handler.proxies)
- self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
-
- if self.params.get('call_home', False):
- ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
- self._write_string('[debug] Public IP address: %s\n' % ipaddr)
- latest_version = self.urlopen(
- 'https://yt-dl.org/latest/version').read().decode('utf-8')
- if version_tuple(latest_version) > version_tuple(__version__):
- self.report_warning(
- 'You are using an outdated version (newest version: %s)! '
- 'See https://yt-dl.org/update if you need help updating.' %
- latest_version)
-
- def _setup_opener(self):
- timeout_val = self.params.get('socket_timeout')
- self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
-
- opts_cookiefile = self.params.get('cookiefile')
- opts_proxy = self.params.get('proxy')
-
- if opts_cookiefile is None:
- self.cookiejar = compat_cookiejar.CookieJar()
- else:
- opts_cookiefile = expand_path(opts_cookiefile)
- self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
- if os.access(opts_cookiefile, os.R_OK):
- self.cookiejar.load(ignore_discard=True, ignore_expires=True)
-
- cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
- if opts_proxy is not None:
- if opts_proxy == '':
- proxies = {}
- else:
- proxies = {'http': opts_proxy, 'https': opts_proxy}
- else:
- proxies = compat_urllib_request.getproxies()
- # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
- if 'http' in proxies and 'https' not in proxies:
- proxies['https'] = proxies['http']
- proxy_handler = PerRequestProxyHandler(proxies)
-
- debuglevel = 1 if self.params.get('debug_printtraffic') else 0
- https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
- ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
- data_handler = compat_urllib_request_DataHandler()
-
- # When passing our own FileHandler instance, build_opener won't add the
- # default FileHandler and allows us to disable the file protocol, which
- # can be used for malicious purposes (see
- # https://github.com/ytdl-org/youtube-dl/issues/8227)
- file_handler = compat_urllib_request.FileHandler()
-
- def file_open(*args, **kwargs):
- raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
- file_handler.file_open = file_open
-
- opener = compat_urllib_request.build_opener(
- proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
-
- # Delete the default user-agent header, which would otherwise apply in
- # cases where our custom HTTP handler doesn't come into play
- # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
- opener.addheaders = []
- self._opener = opener
-
- def encode(self, s):
- if isinstance(s, bytes):
- return s # Already encoded
-
- try:
- return s.encode(self.get_encoding())
- except UnicodeEncodeError as err:
- err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
- raise
-
- def get_encoding(self):
- encoding = self.params.get('encoding')
- if encoding is None:
- encoding = preferredencoding()
- return encoding
-
- def _write_thumbnails(self, info_dict, filename):
- if self.params.get('writethumbnail', False):
- thumbnails = info_dict.get('thumbnails')
- if thumbnails:
- thumbnails = [thumbnails[-1]]
- elif self.params.get('write_all_thumbnails', False):
- thumbnails = info_dict.get('thumbnails')
- else:
- return
-
- if not thumbnails:
- # No thumbnails present, so return immediately
- return
-
- for t in thumbnails:
- thumb_ext = determine_ext(t['url'], 'jpg')
- suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
- thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
- t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
-
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
- self.to_screen('[%s] %s: Thumbnail %sis already present' %
- (info_dict['extractor'], info_dict['id'], thumb_display_id))
- else:
- self.to_screen('[%s] %s: Downloading thumbnail %s...' %
- (info_dict['extractor'], info_dict['id'], thumb_display_id))
- try:
- uf = self.urlopen(t['url'])
- with open(encodeFilename(thumb_filename), 'wb') as thumbf:
- shutil.copyfileobj(uf, thumbf)
- self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
- (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- self.report_warning('Unable to download thumbnail "%s": %s' %
- (t['url'], error_to_compat_str(err)))
diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py
deleted file mode 100644
index 9a659fc65..000000000
--- a/youtube_dl/__init__.py
+++ /dev/null
@@ -1,483 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-
-from __future__ import unicode_literals
-
-__license__ = 'Public Domain'
-
-import codecs
-import io
-import os
-import random
-import sys
-
-
-from .options import (
- parseOpts,
-)
-from .compat import (
- compat_getpass,
- compat_shlex_split,
- workaround_optparse_bug9161,
-)
-from .utils import (
- DateRange,
- decodeOption,
- DEFAULT_OUTTMPL,
- DownloadError,
- expand_path,
- match_filter_func,
- MaxDownloadsReached,
- preferredencoding,
- read_batch_urls,
- SameFileError,
- setproctitle,
- std_headers,
- write_string,
- render_table,
-)
-from .update import update_self
-from .downloader import (
- FileDownloader,
-)
-from .extractor import gen_extractors, list_extractors
-from .extractor.adobepass import MSO_INFO
-from .YoutubeDL import YoutubeDL
-
-
-def _real_main(argv=None):
- # Compatibility fixes for Windows
- if sys.platform == 'win32':
- # https://github.com/ytdl-org/youtube-dl/issues/820
- codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
-
- workaround_optparse_bug9161()
-
- setproctitle('youtube-dl')
-
- parser, opts, args = parseOpts(argv)
-
- # Set user agent
- if opts.user_agent is not None:
- std_headers['User-Agent'] = opts.user_agent
-
- # Set referer
- if opts.referer is not None:
- std_headers['Referer'] = opts.referer
-
- # Custom HTTP headers
- if opts.headers is not None:
- for h in opts.headers:
- if ':' not in h:
- parser.error('wrong header formatting, it should be key:value, not "%s"' % h)
- key, value = h.split(':', 1)
- if opts.verbose:
- write_string('[debug] Adding header from command line option %s:%s\n' % (key, value))
- std_headers[key] = value
-
- # Dump user agent
- if opts.dump_user_agent:
- write_string(std_headers['User-Agent'] + '\n', out=sys.stdout)
- sys.exit(0)
-
- # Batch file verification
- batch_urls = []
- if opts.batchfile is not None:
- try:
- if opts.batchfile == '-':
- batchfd = sys.stdin
- else:
- batchfd = io.open(
- expand_path(opts.batchfile),
- 'r', encoding='utf-8', errors='ignore')
- batch_urls = read_batch_urls(batchfd)
- if opts.verbose:
- write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
- except IOError:
- sys.exit('ERROR: batch file %s could not be read' % opts.batchfile)
- all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls
- _enc = preferredencoding()
- all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
-
- if opts.list_extractors:
- for ie in list_extractors(opts.age_limit):
- write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '') + '\n', out=sys.stdout)
- matchedUrls = [url for url in all_urls if ie.suitable(url)]
- for mu in matchedUrls:
- write_string(' ' + mu + '\n', out=sys.stdout)
- sys.exit(0)
- if opts.list_extractor_descriptions:
- for ie in list_extractors(opts.age_limit):
- if not ie._WORKING:
- continue
- desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
- if desc is False:
- continue
- if hasattr(ie, 'SEARCH_KEY'):
- _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow')
- _COUNTS = ('', '5', '10', 'all')
- desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
- write_string(desc + '\n', out=sys.stdout)
- sys.exit(0)
- if opts.ap_list_mso:
- table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]
- write_string('Supported TV Providers:\n' + render_table(['mso', 'mso name'], table) + '\n', out=sys.stdout)
- sys.exit(0)
-
- # Conflicting, missing and erroneous options
- if opts.usenetrc and (opts.username is not None or opts.password is not None):
- parser.error('using .netrc conflicts with giving username/password')
- if opts.password is not None and opts.username is None:
- parser.error('account username missing\n')
- if opts.ap_password is not None and opts.ap_username is None:
- parser.error('TV Provider account username missing\n')
- if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
- parser.error('using output template conflicts with using title, video ID or auto number')
- if opts.autonumber_size is not None:
- if opts.autonumber_size <= 0:
- parser.error('auto number size must be positive')
- if opts.autonumber_start is not None:
- if opts.autonumber_start < 0:
- parser.error('auto number start must be positive or 0')
- if opts.usetitle and opts.useid:
- parser.error('using title conflicts with using video ID')
- if opts.username is not None and opts.password is None:
- opts.password = compat_getpass('Type account password and press [Return]: ')
- if opts.ap_username is not None and opts.ap_password is None:
- opts.ap_password = compat_getpass('Type TV provider account password and press [Return]: ')
- if opts.ratelimit is not None:
- numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
- if numeric_limit is None:
- parser.error('invalid rate limit specified')
- opts.ratelimit = numeric_limit
- if opts.min_filesize is not None:
- numeric_limit = FileDownloader.parse_bytes(opts.min_filesize)
- if numeric_limit is None:
- parser.error('invalid min_filesize specified')
- opts.min_filesize = numeric_limit
- if opts.max_filesize is not None:
- numeric_limit = FileDownloader.parse_bytes(opts.max_filesize)
- if numeric_limit is None:
- parser.error('invalid max_filesize specified')
- opts.max_filesize = numeric_limit
- if opts.sleep_interval is not None:
- if opts.sleep_interval < 0:
- parser.error('sleep interval must be positive or 0')
- if opts.max_sleep_interval is not None:
- if opts.max_sleep_interval < 0:
- parser.error('max sleep interval must be positive or 0')
- if opts.sleep_interval is None:
- parser.error('min sleep interval must be specified, use --min-sleep-interval')
- if opts.max_sleep_interval < opts.sleep_interval:
- parser.error('max sleep interval must be greater than or equal to min sleep interval')
- else:
- opts.max_sleep_interval = opts.sleep_interval
- if opts.ap_mso and opts.ap_mso not in MSO_INFO:
- parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers')
-
- def parse_retries(retries):
- if retries in ('inf', 'infinite'):
- parsed_retries = float('inf')
- else:
- try:
- parsed_retries = int(retries)
- except (TypeError, ValueError):
- parser.error('invalid retry count specified')
- return parsed_retries
- if opts.retries is not None:
- opts.retries = parse_retries(opts.retries)
- if opts.fragment_retries is not None:
- opts.fragment_retries = parse_retries(opts.fragment_retries)
- if opts.buffersize is not None:
- numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
- if numeric_buffersize is None:
- parser.error('invalid buffer size specified')
- opts.buffersize = numeric_buffersize
- if opts.http_chunk_size is not None:
- numeric_chunksize = FileDownloader.parse_bytes(opts.http_chunk_size)
- if not numeric_chunksize:
- parser.error('invalid http chunk size specified')
- opts.http_chunk_size = numeric_chunksize
- if opts.playliststart <= 0:
- raise ValueError('Playlist start must be positive')
- if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
- raise ValueError('Playlist end must be greater than playlist start')
- if opts.extractaudio:
- if opts.audioformat not in ['best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
- parser.error('invalid audio format specified')
- if opts.audioquality:
- opts.audioquality = opts.audioquality.strip('k').strip('K')
- if not opts.audioquality.isdigit():
- parser.error('invalid audio quality specified')
- if opts.recodevideo is not None:
- if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']:
- parser.error('invalid video recode format specified')
- if opts.convertsubtitles is not None:
- if opts.convertsubtitles not in ['srt', 'vtt', 'ass', 'lrc']:
- parser.error('invalid subtitle format specified')
-
- if opts.date is not None:
- date = DateRange.day(opts.date)
- else:
- date = DateRange(opts.dateafter, opts.datebefore)
-
- # Do not download videos when there are audio-only formats
- if opts.extractaudio and not opts.keepvideo and opts.format is None:
- opts.format = 'bestaudio/best'
-
- # --all-sub automatically sets --write-sub if --write-auto-sub is not given
- # this was the old behaviour if only --all-sub was given.
- if opts.allsubtitles and not opts.writeautomaticsub:
- opts.writesubtitles = True
-
- outtmpl = ((opts.outtmpl is not None and opts.outtmpl)
- or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s')
- or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s')
- or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s')
- or (opts.usetitle and '%(title)s-%(id)s.%(ext)s')
- or (opts.useid and '%(id)s.%(ext)s')
- or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s')
- or DEFAULT_OUTTMPL)
- if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
- parser.error('Cannot download a video and extract audio into the same'
- ' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
- ' template'.format(outtmpl))
-
- any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
- any_printing = opts.print_json
- download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive
-
- # PostProcessors
- postprocessors = []
- if opts.metafromtitle:
- postprocessors.append({
- 'key': 'MetadataFromTitle',
- 'titleformat': opts.metafromtitle
- })
- if opts.extractaudio:
- postprocessors.append({
- 'key': 'FFmpegExtractAudio',
- 'preferredcodec': opts.audioformat,
- 'preferredquality': opts.audioquality,
- 'nopostoverwrites': opts.nopostoverwrites,
- })
- if opts.recodevideo:
- postprocessors.append({
- 'key': 'FFmpegVideoConvertor',
- 'preferedformat': opts.recodevideo,
- })
- # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and
- # FFmpegExtractAudioPP as containers before conversion may not support
- # metadata (3gp, webm, etc.)
- # And this post-processor should be placed before other metadata
- # manipulating post-processors (FFmpegEmbedSubtitle) to prevent loss of
- # extra metadata. By default ffmpeg preserves metadata applicable for both
- # source and target containers. From this point the container won't change,
- # so metadata can be added here.
- if opts.addmetadata:
- postprocessors.append({'key': 'FFmpegMetadata'})
- if opts.convertsubtitles:
- postprocessors.append({
- 'key': 'FFmpegSubtitlesConvertor',
- 'format': opts.convertsubtitles,
- })
- if opts.embedsubtitles:
- postprocessors.append({
- 'key': 'FFmpegEmbedSubtitle',
- })
- if opts.embedthumbnail:
- already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
- postprocessors.append({
- 'key': 'EmbedThumbnail',
- 'already_have_thumbnail': already_have_thumbnail
- })
- if not already_have_thumbnail:
- opts.writethumbnail = True
- # XAttrMetadataPP should be run after post-processors that may change file
- # contents
- if opts.xattrs:
- postprocessors.append({'key': 'XAttrMetadata'})
- # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
- # So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
- if opts.exec_cmd:
- postprocessors.append({
- 'key': 'ExecAfterDownload',
- 'exec_cmd': opts.exec_cmd,
- })
- external_downloader_args = None
- if opts.external_downloader_args:
- external_downloader_args = compat_shlex_split(opts.external_downloader_args)
- postprocessor_args = None
- if opts.postprocessor_args:
- postprocessor_args = compat_shlex_split(opts.postprocessor_args)
- match_filter = (
- None if opts.match_filter is None
- else match_filter_func(opts.match_filter))
-
- ydl_opts = {
- 'usenetrc': opts.usenetrc,
- 'username': opts.username,
- 'password': opts.password,
- 'twofactor': opts.twofactor,
- 'videopassword': opts.videopassword,
- 'ap_mso': opts.ap_mso,
- 'ap_username': opts.ap_username,
- 'ap_password': opts.ap_password,
- 'quiet': (opts.quiet or any_getting or any_printing),
- 'no_warnings': opts.no_warnings,
- 'forceurl': opts.geturl,
- 'forcetitle': opts.gettitle,
- 'forceid': opts.getid,
- 'forcethumbnail': opts.getthumbnail,
- 'forcedescription': opts.getdescription,
- 'forceduration': opts.getduration,
- 'forcefilename': opts.getfilename,
- 'forceformat': opts.getformat,
- 'forcejson': opts.dumpjson or opts.print_json,
- 'dump_single_json': opts.dump_single_json,
- 'simulate': opts.simulate or any_getting,
- 'skip_download': opts.skip_download,
- 'format': opts.format,
- 'listformats': opts.listformats,
- 'outtmpl': outtmpl,
- 'autonumber_size': opts.autonumber_size,
- 'autonumber_start': opts.autonumber_start,
- 'restrictfilenames': opts.restrictfilenames,
- 'ignoreerrors': opts.ignoreerrors,
- 'force_generic_extractor': opts.force_generic_extractor,
- 'ratelimit': opts.ratelimit,
- 'nooverwrites': opts.nooverwrites,
- 'retries': opts.retries,
- 'fragment_retries': opts.fragment_retries,
- 'skip_unavailable_fragments': opts.skip_unavailable_fragments,
- 'keep_fragments': opts.keep_fragments,
- 'buffersize': opts.buffersize,
- 'noresizebuffer': opts.noresizebuffer,
- 'http_chunk_size': opts.http_chunk_size,
- 'continuedl': opts.continue_dl,
- 'noprogress': opts.noprogress,
- 'progress_with_newline': opts.progress_with_newline,
- 'playliststart': opts.playliststart,
- 'playlistend': opts.playlistend,
- 'playlistreverse': opts.playlist_reverse,
- 'playlistrandom': opts.playlist_random,
- 'noplaylist': opts.noplaylist,
- 'logtostderr': opts.outtmpl == '-',
- 'consoletitle': opts.consoletitle,
- 'nopart': opts.nopart,
- 'updatetime': opts.updatetime,
- 'writedescription': opts.writedescription,
- 'writeannotations': opts.writeannotations,
- 'writeinfojson': opts.writeinfojson,
- 'writethumbnail': opts.writethumbnail,
- 'write_all_thumbnails': opts.write_all_thumbnails,
- 'writesubtitles': opts.writesubtitles,
- 'writeautomaticsub': opts.writeautomaticsub,
- 'allsubtitles': opts.allsubtitles,
- 'listsubtitles': opts.listsubtitles,
- 'subtitlesformat': opts.subtitlesformat,
- 'subtitleslangs': opts.subtitleslangs,
- 'matchtitle': decodeOption(opts.matchtitle),
- 'rejecttitle': decodeOption(opts.rejecttitle),
- 'max_downloads': opts.max_downloads,
- 'prefer_free_formats': opts.prefer_free_formats,
- 'verbose': opts.verbose,
- 'dump_intermediate_pages': opts.dump_intermediate_pages,
- 'write_pages': opts.write_pages,
- 'test': opts.test,
- 'keepvideo': opts.keepvideo,
- 'min_filesize': opts.min_filesize,
- 'max_filesize': opts.max_filesize,
- 'min_views': opts.min_views,
- 'max_views': opts.max_views,
- 'daterange': date,
- 'cachedir': opts.cachedir,
- 'youtube_print_sig_code': opts.youtube_print_sig_code,
- 'age_limit': opts.age_limit,
- 'download_archive': download_archive_fn,
- 'cookiefile': opts.cookiefile,
- 'nocheckcertificate': opts.no_check_certificate,
- 'prefer_insecure': opts.prefer_insecure,
- 'proxy': opts.proxy,
- 'socket_timeout': opts.socket_timeout,
- 'bidi_workaround': opts.bidi_workaround,
- 'debug_printtraffic': opts.debug_printtraffic,
- 'prefer_ffmpeg': opts.prefer_ffmpeg,
- 'include_ads': opts.include_ads,
- 'default_search': opts.default_search,
- 'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
- 'encoding': opts.encoding,
- 'extract_flat': opts.extract_flat,
- 'mark_watched': opts.mark_watched,
- 'merge_output_format': opts.merge_output_format,
- 'postprocessors': postprocessors,
- 'fixup': opts.fixup,
- 'source_address': opts.source_address,
- 'call_home': opts.call_home,
- 'sleep_interval': opts.sleep_interval,
- 'max_sleep_interval': opts.max_sleep_interval,
- 'external_downloader': opts.external_downloader,
- 'list_thumbnails': opts.list_thumbnails,
- 'playlist_items': opts.playlist_items,
- 'xattr_set_filesize': opts.xattr_set_filesize,
- 'match_filter': match_filter,
- 'no_color': opts.no_color,
- 'ffmpeg_location': opts.ffmpeg_location,
- 'hls_prefer_native': opts.hls_prefer_native,
- 'hls_use_mpegts': opts.hls_use_mpegts,
- 'external_downloader_args': external_downloader_args,
- 'postprocessor_args': postprocessor_args,
- 'cn_verification_proxy': opts.cn_verification_proxy,
- 'geo_verification_proxy': opts.geo_verification_proxy,
- 'config_location': opts.config_location,
- 'geo_bypass': opts.geo_bypass,
- 'geo_bypass_country': opts.geo_bypass_country,
- 'geo_bypass_ip_block': opts.geo_bypass_ip_block,
- # just for deprecation check
- 'autonumber': opts.autonumber if opts.autonumber is True else None,
- 'usetitle': opts.usetitle if opts.usetitle is True else None,
- }
-
- with YoutubeDL(ydl_opts) as ydl:
- # Update version
- if opts.update_self:
- update_self(ydl.to_screen, opts.verbose, ydl._opener)
-
- # Remove cache dir
- if opts.rm_cachedir:
- ydl.cache.remove()
-
- # Maybe do nothing
- if (len(all_urls) < 1) and (opts.load_info_filename is None):
- if opts.update_self or opts.rm_cachedir:
- sys.exit()
-
- ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv)
- parser.error(
- 'You must provide at least one URL.\n'
- 'Type youtube-dl --help to see a list of all options.')
-
- try:
- if opts.load_info_filename is not None:
- retcode = ydl.download_with_info_file(expand_path(opts.load_info_filename))
- else:
- retcode = ydl.download(all_urls)
- except MaxDownloadsReached:
- ydl.to_screen('--max-download limit reached, aborting.')
- retcode = 101
-
- sys.exit(retcode)
-
-
-def main(argv=None):
- try:
- _real_main(argv)
- except DownloadError:
- sys.exit(1)
- except SameFileError:
- sys.exit('ERROR: fixed output name but more than one file to download')
- except KeyboardInterrupt:
- sys.exit('\nERROR: Interrupted by user')
-
-
-__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors']
diff --git a/youtube_dl/__main__.py b/youtube_dl/__main__.py
deleted file mode 100755
index 138f5fbec..000000000
--- a/youtube_dl/__main__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/usr/bin/env python
-from __future__ import unicode_literals
-
-# Execute with
-# $ python youtube_dl/__main__.py (2.6+)
-# $ python -m youtube_dl (2.7+)
-
-import sys
-
-if __package__ is None and not hasattr(sys, 'frozen'):
- # direct call of __main__.py
- import os.path
- path = os.path.realpath(os.path.abspath(__file__))
- sys.path.insert(0, os.path.dirname(os.path.dirname(path)))
-
-import youtube_dl
-
-if __name__ == '__main__':
- youtube_dl.main()
diff --git a/youtube_dl/cache.py b/youtube_dl/cache.py
deleted file mode 100644
index 7bdade1bd..000000000
--- a/youtube_dl/cache.py
+++ /dev/null
@@ -1,96 +0,0 @@
-from __future__ import unicode_literals
-
-import errno
-import io
-import json
-import os
-import re
-import shutil
-import traceback
-
-from .compat import compat_getenv
-from .utils import (
- expand_path,
- write_json_file,
-)
-
-
-class Cache(object):
- def __init__(self, ydl):
- self._ydl = ydl
-
- def _get_root_dir(self):
- res = self._ydl.params.get('cachedir')
- if res is None:
- cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
- res = os.path.join(cache_root, 'youtube-dl')
- return expand_path(res)
-
- def _get_cache_fn(self, section, key, dtype):
- assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \
- 'invalid section %r' % section
- assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key
- return os.path.join(
- self._get_root_dir(), section, '%s.%s' % (key, dtype))
-
- @property
- def enabled(self):
- return self._ydl.params.get('cachedir') is not False
-
- def store(self, section, key, data, dtype='json'):
- assert dtype in ('json',)
-
- if not self.enabled:
- return
-
- fn = self._get_cache_fn(section, key, dtype)
- try:
- try:
- os.makedirs(os.path.dirname(fn))
- except OSError as ose:
- if ose.errno != errno.EEXIST:
- raise
- write_json_file(data, fn)
- except Exception:
- tb = traceback.format_exc()
- self._ydl.report_warning(
- 'Writing cache to %r failed: %s' % (fn, tb))
-
- def load(self, section, key, dtype='json', default=None):
- assert dtype in ('json',)
-
- if not self.enabled:
- return default
-
- cache_fn = self._get_cache_fn(section, key, dtype)
- try:
- try:
- with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
- return json.load(cachef)
- except ValueError:
- try:
- file_size = os.path.getsize(cache_fn)
- except (OSError, IOError) as oe:
- file_size = str(oe)
- self._ydl.report_warning(
- 'Cache retrieval from %s failed (%s)' % (cache_fn, file_size))
- except IOError:
- pass # No cache available
-
- return default
-
- def remove(self):
- if not self.enabled:
- self._ydl.to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)')
- return
-
- cachedir = self._get_root_dir()
- if not any((term in cachedir) for term in ('cache', 'tmp')):
- raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir)
-
- self._ydl.to_screen(
- 'Removing cache dir %s .' % cachedir, skip_eol=True)
- if os.path.exists(cachedir):
- self._ydl.to_screen('.', skip_eol=True)
- shutil.rmtree(cachedir)
- self._ydl.to_screen('.')
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
deleted file mode 100644
index c75ab131b..000000000
--- a/youtube_dl/compat.py
+++ /dev/null
@@ -1,3026 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import base64
-import binascii
-import collections
-import ctypes
-import email
-import getpass
-import io
-import itertools
-import optparse
-import os
-import platform
-import re
-import shlex
-import shutil
-import socket
-import struct
-import subprocess
-import sys
-import xml.etree.ElementTree
-
-
-try:
- import urllib.request as compat_urllib_request
-except ImportError: # Python 2
- import urllib2 as compat_urllib_request
-
-try:
- import urllib.error as compat_urllib_error
-except ImportError: # Python 2
- import urllib2 as compat_urllib_error
-
-try:
- import urllib.parse as compat_urllib_parse
-except ImportError: # Python 2
- import urllib as compat_urllib_parse
-
-try:
- from urllib.parse import urlparse as compat_urllib_parse_urlparse
-except ImportError: # Python 2
- from urlparse import urlparse as compat_urllib_parse_urlparse
-
-try:
- import urllib.parse as compat_urlparse
-except ImportError: # Python 2
- import urlparse as compat_urlparse
-
-try:
- import urllib.response as compat_urllib_response
-except ImportError: # Python 2
- import urllib as compat_urllib_response
-
-try:
- import http.cookiejar as compat_cookiejar
-except ImportError: # Python 2
- import cookielib as compat_cookiejar
-
-try:
- import http.cookies as compat_cookies
-except ImportError: # Python 2
- import Cookie as compat_cookies
-
-try:
- import html.entities as compat_html_entities
-except ImportError: # Python 2
- import htmlentitydefs as compat_html_entities
-
-try: # Python >= 3.3
- compat_html_entities_html5 = compat_html_entities.html5
-except AttributeError:
- # Copied from CPython 3.5.1 html/entities.py
- compat_html_entities_html5 = {
- 'Aacute': '\xc1',
- 'aacute': '\xe1',
- 'Aacute;': '\xc1',
- 'aacute;': '\xe1',
- 'Abreve;': '\u0102',
- 'abreve;': '\u0103',
- 'ac;': '\u223e',
- 'acd;': '\u223f',
- 'acE;': '\u223e\u0333',
- 'Acirc': '\xc2',
- 'acirc': '\xe2',
- 'Acirc;': '\xc2',
- 'acirc;': '\xe2',
- 'acute': '\xb4',
- 'acute;': '\xb4',
- 'Acy;': '\u0410',
- 'acy;': '\u0430',
- 'AElig': '\xc6',
- 'aelig': '\xe6',
- 'AElig;': '\xc6',
- 'aelig;': '\xe6',
- 'af;': '\u2061',
- 'Afr;': '\U0001d504',
- 'afr;': '\U0001d51e',
- 'Agrave': '\xc0',
- 'agrave': '\xe0',
- 'Agrave;': '\xc0',
- 'agrave;': '\xe0',
- 'alefsym;': '\u2135',
- 'aleph;': '\u2135',
- 'Alpha;': '\u0391',
- 'alpha;': '\u03b1',
- 'Amacr;': '\u0100',
- 'amacr;': '\u0101',
- 'amalg;': '\u2a3f',
- 'AMP': '&',
- 'amp': '&',
- 'AMP;': '&',
- 'amp;': '&',
- 'And;': '\u2a53',
- 'and;': '\u2227',
- 'andand;': '\u2a55',
- 'andd;': '\u2a5c',
- 'andslope;': '\u2a58',
- 'andv;': '\u2a5a',
- 'ang;': '\u2220',
- 'ange;': '\u29a4',
- 'angle;': '\u2220',
- 'angmsd;': '\u2221',
- 'angmsdaa;': '\u29a8',
- 'angmsdab;': '\u29a9',
- 'angmsdac;': '\u29aa',
- 'angmsdad;': '\u29ab',
- 'angmsdae;': '\u29ac',
- 'angmsdaf;': '\u29ad',
- 'angmsdag;': '\u29ae',
- 'angmsdah;': '\u29af',
- 'angrt;': '\u221f',
- 'angrtvb;': '\u22be',
- 'angrtvbd;': '\u299d',
- 'angsph;': '\u2222',
- 'angst;': '\xc5',
- 'angzarr;': '\u237c',
- 'Aogon;': '\u0104',
- 'aogon;': '\u0105',
- 'Aopf;': '\U0001d538',
- 'aopf;': '\U0001d552',
- 'ap;': '\u2248',
- 'apacir;': '\u2a6f',
- 'apE;': '\u2a70',
- 'ape;': '\u224a',
- 'apid;': '\u224b',
- 'apos;': "'",
- 'ApplyFunction;': '\u2061',
- 'approx;': '\u2248',
- 'approxeq;': '\u224a',
- 'Aring': '\xc5',
- 'aring': '\xe5',
- 'Aring;': '\xc5',
- 'aring;': '\xe5',
- 'Ascr;': '\U0001d49c',
- 'ascr;': '\U0001d4b6',
- 'Assign;': '\u2254',
- 'ast;': '*',
- 'asymp;': '\u2248',
- 'asympeq;': '\u224d',
- 'Atilde': '\xc3',
- 'atilde': '\xe3',
- 'Atilde;': '\xc3',
- 'atilde;': '\xe3',
- 'Auml': '\xc4',
- 'auml': '\xe4',
- 'Auml;': '\xc4',
- 'auml;': '\xe4',
- 'awconint;': '\u2233',
- 'awint;': '\u2a11',
- 'backcong;': '\u224c',
- 'backepsilon;': '\u03f6',
- 'backprime;': '\u2035',
- 'backsim;': '\u223d',
- 'backsimeq;': '\u22cd',
- 'Backslash;': '\u2216',
- 'Barv;': '\u2ae7',
- 'barvee;': '\u22bd',
- 'Barwed;': '\u2306',
- 'barwed;': '\u2305',
- 'barwedge;': '\u2305',
- 'bbrk;': '\u23b5',
- 'bbrktbrk;': '\u23b6',
- 'bcong;': '\u224c',
- 'Bcy;': '\u0411',
- 'bcy;': '\u0431',
- 'bdquo;': '\u201e',
- 'becaus;': '\u2235',
- 'Because;': '\u2235',
- 'because;': '\u2235',
- 'bemptyv;': '\u29b0',
- 'bepsi;': '\u03f6',
- 'bernou;': '\u212c',
- 'Bernoullis;': '\u212c',
- 'Beta;': '\u0392',
- 'beta;': '\u03b2',
- 'beth;': '\u2136',
- 'between;': '\u226c',
- 'Bfr;': '\U0001d505',
- 'bfr;': '\U0001d51f',
- 'bigcap;': '\u22c2',
- 'bigcirc;': '\u25ef',
- 'bigcup;': '\u22c3',
- 'bigodot;': '\u2a00',
- 'bigoplus;': '\u2a01',
- 'bigotimes;': '\u2a02',
- 'bigsqcup;': '\u2a06',
- 'bigstar;': '\u2605',
- 'bigtriangledown;': '\u25bd',
- 'bigtriangleup;': '\u25b3',
- 'biguplus;': '\u2a04',
- 'bigvee;': '\u22c1',
- 'bigwedge;': '\u22c0',
- 'bkarow;': '\u290d',
- 'blacklozenge;': '\u29eb',
- 'blacksquare;': '\u25aa',
- 'blacktriangle;': '\u25b4',
- 'blacktriangledown;': '\u25be',
- 'blacktriangleleft;': '\u25c2',
- 'blacktriangleright;': '\u25b8',
- 'blank;': '\u2423',
- 'blk12;': '\u2592',
- 'blk14;': '\u2591',
- 'blk34;': '\u2593',
- 'block;': '\u2588',
- 'bne;': '=\u20e5',
- 'bnequiv;': '\u2261\u20e5',
- 'bNot;': '\u2aed',
- 'bnot;': '\u2310',
- 'Bopf;': '\U0001d539',
- 'bopf;': '\U0001d553',
- 'bot;': '\u22a5',
- 'bottom;': '\u22a5',
- 'bowtie;': '\u22c8',
- 'boxbox;': '\u29c9',
- 'boxDL;': '\u2557',
- 'boxDl;': '\u2556',
- 'boxdL;': '\u2555',
- 'boxdl;': '\u2510',
- 'boxDR;': '\u2554',
- 'boxDr;': '\u2553',
- 'boxdR;': '\u2552',
- 'boxdr;': '\u250c',
- 'boxH;': '\u2550',
- 'boxh;': '\u2500',
- 'boxHD;': '\u2566',
- 'boxHd;': '\u2564',
- 'boxhD;': '\u2565',
- 'boxhd;': '\u252c',
- 'boxHU;': '\u2569',
- 'boxHu;': '\u2567',
- 'boxhU;': '\u2568',
- 'boxhu;': '\u2534',
- 'boxminus;': '\u229f',
- 'boxplus;': '\u229e',
- 'boxtimes;': '\u22a0',
- 'boxUL;': '\u255d',
- 'boxUl;': '\u255c',
- 'boxuL;': '\u255b',
- 'boxul;': '\u2518',
- 'boxUR;': '\u255a',
- 'boxUr;': '\u2559',
- 'boxuR;': '\u2558',
- 'boxur;': '\u2514',
- 'boxV;': '\u2551',
- 'boxv;': '\u2502',
- 'boxVH;': '\u256c',
- 'boxVh;': '\u256b',
- 'boxvH;': '\u256a',
- 'boxvh;': '\u253c',
- 'boxVL;': '\u2563',
- 'boxVl;': '\u2562',
- 'boxvL;': '\u2561',
- 'boxvl;': '\u2524',
- 'boxVR;': '\u2560',
- 'boxVr;': '\u255f',
- 'boxvR;': '\u255e',
- 'boxvr;': '\u251c',
- 'bprime;': '\u2035',
- 'Breve;': '\u02d8',
- 'breve;': '\u02d8',
- 'brvbar': '\xa6',
- 'brvbar;': '\xa6',
- 'Bscr;': '\u212c',
- 'bscr;': '\U0001d4b7',
- 'bsemi;': '\u204f',
- 'bsim;': '\u223d',
- 'bsime;': '\u22cd',
- 'bsol;': '\\',
- 'bsolb;': '\u29c5',
- 'bsolhsub;': '\u27c8',
- 'bull;': '\u2022',
- 'bullet;': '\u2022',
- 'bump;': '\u224e',
- 'bumpE;': '\u2aae',
- 'bumpe;': '\u224f',
- 'Bumpeq;': '\u224e',
- 'bumpeq;': '\u224f',
- 'Cacute;': '\u0106',
- 'cacute;': '\u0107',
- 'Cap;': '\u22d2',
- 'cap;': '\u2229',
- 'capand;': '\u2a44',
- 'capbrcup;': '\u2a49',
- 'capcap;': '\u2a4b',
- 'capcup;': '\u2a47',
- 'capdot;': '\u2a40',
- 'CapitalDifferentialD;': '\u2145',
- 'caps;': '\u2229\ufe00',
- 'caret;': '\u2041',
- 'caron;': '\u02c7',
- 'Cayleys;': '\u212d',
- 'ccaps;': '\u2a4d',
- 'Ccaron;': '\u010c',
- 'ccaron;': '\u010d',
- 'Ccedil': '\xc7',
- 'ccedil': '\xe7',
- 'Ccedil;': '\xc7',
- 'ccedil;': '\xe7',
- 'Ccirc;': '\u0108',
- 'ccirc;': '\u0109',
- 'Cconint;': '\u2230',
- 'ccups;': '\u2a4c',
- 'ccupssm;': '\u2a50',
- 'Cdot;': '\u010a',
- 'cdot;': '\u010b',
- 'cedil': '\xb8',
- 'cedil;': '\xb8',
- 'Cedilla;': '\xb8',
- 'cemptyv;': '\u29b2',
- 'cent': '\xa2',
- 'cent;': '\xa2',
- 'CenterDot;': '\xb7',
- 'centerdot;': '\xb7',
- 'Cfr;': '\u212d',
- 'cfr;': '\U0001d520',
- 'CHcy;': '\u0427',
- 'chcy;': '\u0447',
- 'check;': '\u2713',
- 'checkmark;': '\u2713',
- 'Chi;': '\u03a7',
- 'chi;': '\u03c7',
- 'cir;': '\u25cb',
- 'circ;': '\u02c6',
- 'circeq;': '\u2257',
- 'circlearrowleft;': '\u21ba',
- 'circlearrowright;': '\u21bb',
- 'circledast;': '\u229b',
- 'circledcirc;': '\u229a',
- 'circleddash;': '\u229d',
- 'CircleDot;': '\u2299',
- 'circledR;': '\xae',
- 'circledS;': '\u24c8',
- 'CircleMinus;': '\u2296',
- 'CirclePlus;': '\u2295',
- 'CircleTimes;': '\u2297',
- 'cirE;': '\u29c3',
- 'cire;': '\u2257',
- 'cirfnint;': '\u2a10',
- 'cirmid;': '\u2aef',
- 'cirscir;': '\u29c2',
- 'ClockwiseContourIntegral;': '\u2232',
- 'CloseCurlyDoubleQuote;': '\u201d',
- 'CloseCurlyQuote;': '\u2019',
- 'clubs;': '\u2663',
- 'clubsuit;': '\u2663',
- 'Colon;': '\u2237',
- 'colon;': ':',
- 'Colone;': '\u2a74',
- 'colone;': '\u2254',
- 'coloneq;': '\u2254',
- 'comma;': ',',
- 'commat;': '@',
- 'comp;': '\u2201',
- 'compfn;': '\u2218',
- 'complement;': '\u2201',
- 'complexes;': '\u2102',
- 'cong;': '\u2245',
- 'congdot;': '\u2a6d',
- 'Congruent;': '\u2261',
- 'Conint;': '\u222f',
- 'conint;': '\u222e',
- 'ContourIntegral;': '\u222e',
- 'Copf;': '\u2102',
- 'copf;': '\U0001d554',
- 'coprod;': '\u2210',
- 'Coproduct;': '\u2210',
- 'COPY': '\xa9',
- 'copy': '\xa9',
- 'COPY;': '\xa9',
- 'copy;': '\xa9',
- 'copysr;': '\u2117',
- 'CounterClockwiseContourIntegral;': '\u2233',
- 'crarr;': '\u21b5',
- 'Cross;': '\u2a2f',
- 'cross;': '\u2717',
- 'Cscr;': '\U0001d49e',
- 'cscr;': '\U0001d4b8',
- 'csub;': '\u2acf',
- 'csube;': '\u2ad1',
- 'csup;': '\u2ad0',
- 'csupe;': '\u2ad2',
- 'ctdot;': '\u22ef',
- 'cudarrl;': '\u2938',
- 'cudarrr;': '\u2935',
- 'cuepr;': '\u22de',
- 'cuesc;': '\u22df',
- 'cularr;': '\u21b6',
- 'cularrp;': '\u293d',
- 'Cup;': '\u22d3',
- 'cup;': '\u222a',
- 'cupbrcap;': '\u2a48',
- 'CupCap;': '\u224d',
- 'cupcap;': '\u2a46',
- 'cupcup;': '\u2a4a',
- 'cupdot;': '\u228d',
- 'cupor;': '\u2a45',
- 'cups;': '\u222a\ufe00',
- 'curarr;': '\u21b7',
- 'curarrm;': '\u293c',
- 'curlyeqprec;': '\u22de',
- 'curlyeqsucc;': '\u22df',
- 'curlyvee;': '\u22ce',
- 'curlywedge;': '\u22cf',
- 'curren': '\xa4',
- 'curren;': '\xa4',
- 'curvearrowleft;': '\u21b6',
- 'curvearrowright;': '\u21b7',
- 'cuvee;': '\u22ce',
- 'cuwed;': '\u22cf',
- 'cwconint;': '\u2232',
- 'cwint;': '\u2231',
- 'cylcty;': '\u232d',
- 'Dagger;': '\u2021',
- 'dagger;': '\u2020',
- 'daleth;': '\u2138',
- 'Darr;': '\u21a1',
- 'dArr;': '\u21d3',
- 'darr;': '\u2193',
- 'dash;': '\u2010',
- 'Dashv;': '\u2ae4',
- 'dashv;': '\u22a3',
- 'dbkarow;': '\u290f',
- 'dblac;': '\u02dd',
- 'Dcaron;': '\u010e',
- 'dcaron;': '\u010f',
- 'Dcy;': '\u0414',
- 'dcy;': '\u0434',
- 'DD;': '\u2145',
- 'dd;': '\u2146',
- 'ddagger;': '\u2021',
- 'ddarr;': '\u21ca',
- 'DDotrahd;': '\u2911',
- 'ddotseq;': '\u2a77',
- 'deg': '\xb0',
- 'deg;': '\xb0',
- 'Del;': '\u2207',
- 'Delta;': '\u0394',
- 'delta;': '\u03b4',
- 'demptyv;': '\u29b1',
- 'dfisht;': '\u297f',
- 'Dfr;': '\U0001d507',
- 'dfr;': '\U0001d521',
- 'dHar;': '\u2965',
- 'dharl;': '\u21c3',
- 'dharr;': '\u21c2',
- 'DiacriticalAcute;': '\xb4',
- 'DiacriticalDot;': '\u02d9',
- 'DiacriticalDoubleAcute;': '\u02dd',
- 'DiacriticalGrave;': '`',
- 'DiacriticalTilde;': '\u02dc',
- 'diam;': '\u22c4',
- 'Diamond;': '\u22c4',
- 'diamond;': '\u22c4',
- 'diamondsuit;': '\u2666',
- 'diams;': '\u2666',
- 'die;': '\xa8',
- 'DifferentialD;': '\u2146',
- 'digamma;': '\u03dd',
- 'disin;': '\u22f2',
- 'div;': '\xf7',
- 'divide': '\xf7',
- 'divide;': '\xf7',
- 'divideontimes;': '\u22c7',
- 'divonx;': '\u22c7',
- 'DJcy;': '\u0402',
- 'djcy;': '\u0452',
- 'dlcorn;': '\u231e',
- 'dlcrop;': '\u230d',
- 'dollar;': '$',
- 'Dopf;': '\U0001d53b',
- 'dopf;': '\U0001d555',
- 'Dot;': '\xa8',
- 'dot;': '\u02d9',
- 'DotDot;': '\u20dc',
- 'doteq;': '\u2250',
- 'doteqdot;': '\u2251',
- 'DotEqual;': '\u2250',
- 'dotminus;': '\u2238',
- 'dotplus;': '\u2214',
- 'dotsquare;': '\u22a1',
- 'doublebarwedge;': '\u2306',
- 'DoubleContourIntegral;': '\u222f',
- 'DoubleDot;': '\xa8',
- 'DoubleDownArrow;': '\u21d3',
- 'DoubleLeftArrow;': '\u21d0',
- 'DoubleLeftRightArrow;': '\u21d4',
- 'DoubleLeftTee;': '\u2ae4',
- 'DoubleLongLeftArrow;': '\u27f8',
- 'DoubleLongLeftRightArrow;': '\u27fa',
- 'DoubleLongRightArrow;': '\u27f9',
- 'DoubleRightArrow;': '\u21d2',
- 'DoubleRightTee;': '\u22a8',
- 'DoubleUpArrow;': '\u21d1',
- 'DoubleUpDownArrow;': '\u21d5',
- 'DoubleVerticalBar;': '\u2225',
- 'DownArrow;': '\u2193',
- 'Downarrow;': '\u21d3',
- 'downarrow;': '\u2193',
- 'DownArrowBar;': '\u2913',
- 'DownArrowUpArrow;': '\u21f5',
- 'DownBreve;': '\u0311',
- 'downdownarrows;': '\u21ca',
- 'downharpoonleft;': '\u21c3',
- 'downharpoonright;': '\u21c2',
- 'DownLeftRightVector;': '\u2950',
- 'DownLeftTeeVector;': '\u295e',
- 'DownLeftVector;': '\u21bd',
- 'DownLeftVectorBar;': '\u2956',
- 'DownRightTeeVector;': '\u295f',
- 'DownRightVector;': '\u21c1',
- 'DownRightVectorBar;': '\u2957',
- 'DownTee;': '\u22a4',
- 'DownTeeArrow;': '\u21a7',
- 'drbkarow;': '\u2910',
- 'drcorn;': '\u231f',
- 'drcrop;': '\u230c',
- 'Dscr;': '\U0001d49f',
- 'dscr;': '\U0001d4b9',
- 'DScy;': '\u0405',
- 'dscy;': '\u0455',
- 'dsol;': '\u29f6',
- 'Dstrok;': '\u0110',
- 'dstrok;': '\u0111',
- 'dtdot;': '\u22f1',
- 'dtri;': '\u25bf',
- 'dtrif;': '\u25be',
- 'duarr;': '\u21f5',
- 'duhar;': '\u296f',
- 'dwangle;': '\u29a6',
- 'DZcy;': '\u040f',
- 'dzcy;': '\u045f',
- 'dzigrarr;': '\u27ff',
- 'Eacute': '\xc9',
- 'eacute': '\xe9',
- 'Eacute;': '\xc9',
- 'eacute;': '\xe9',
- 'easter;': '\u2a6e',
- 'Ecaron;': '\u011a',
- 'ecaron;': '\u011b',
- 'ecir;': '\u2256',
- 'Ecirc': '\xca',
- 'ecirc': '\xea',
- 'Ecirc;': '\xca',
- 'ecirc;': '\xea',
- 'ecolon;': '\u2255',
- 'Ecy;': '\u042d',
- 'ecy;': '\u044d',
- 'eDDot;': '\u2a77',
- 'Edot;': '\u0116',
- 'eDot;': '\u2251',
- 'edot;': '\u0117',
- 'ee;': '\u2147',
- 'efDot;': '\u2252',
- 'Efr;': '\U0001d508',
- 'efr;': '\U0001d522',
- 'eg;': '\u2a9a',
- 'Egrave': '\xc8',
- 'egrave': '\xe8',
- 'Egrave;': '\xc8',
- 'egrave;': '\xe8',
- 'egs;': '\u2a96',
- 'egsdot;': '\u2a98',
- 'el;': '\u2a99',
- 'Element;': '\u2208',
- 'elinters;': '\u23e7',
- 'ell;': '\u2113',
- 'els;': '\u2a95',
- 'elsdot;': '\u2a97',
- 'Emacr;': '\u0112',
- 'emacr;': '\u0113',
- 'empty;': '\u2205',
- 'emptyset;': '\u2205',
- 'EmptySmallSquare;': '\u25fb',
- 'emptyv;': '\u2205',
- 'EmptyVerySmallSquare;': '\u25ab',
- 'emsp13;': '\u2004',
- 'emsp14;': '\u2005',
- 'emsp;': '\u2003',
- 'ENG;': '\u014a',
- 'eng;': '\u014b',
- 'ensp;': '\u2002',
- 'Eogon;': '\u0118',
- 'eogon;': '\u0119',
- 'Eopf;': '\U0001d53c',
- 'eopf;': '\U0001d556',
- 'epar;': '\u22d5',
- 'eparsl;': '\u29e3',
- 'eplus;': '\u2a71',
- 'epsi;': '\u03b5',
- 'Epsilon;': '\u0395',
- 'epsilon;': '\u03b5',
- 'epsiv;': '\u03f5',
- 'eqcirc;': '\u2256',
- 'eqcolon;': '\u2255',
- 'eqsim;': '\u2242',
- 'eqslantgtr;': '\u2a96',
- 'eqslantless;': '\u2a95',
- 'Equal;': '\u2a75',
- 'equals;': '=',
- 'EqualTilde;': '\u2242',
- 'equest;': '\u225f',
- 'Equilibrium;': '\u21cc',
- 'equiv;': '\u2261',
- 'equivDD;': '\u2a78',
- 'eqvparsl;': '\u29e5',
- 'erarr;': '\u2971',
- 'erDot;': '\u2253',
- 'Escr;': '\u2130',
- 'escr;': '\u212f',
- 'esdot;': '\u2250',
- 'Esim;': '\u2a73',
- 'esim;': '\u2242',
- 'Eta;': '\u0397',
- 'eta;': '\u03b7',
- 'ETH': '\xd0',
- 'eth': '\xf0',
- 'ETH;': '\xd0',
- 'eth;': '\xf0',
- 'Euml': '\xcb',
- 'euml': '\xeb',
- 'Euml;': '\xcb',
- 'euml;': '\xeb',
- 'euro;': '\u20ac',
- 'excl;': '!',
- 'exist;': '\u2203',
- 'Exists;': '\u2203',
- 'expectation;': '\u2130',
- 'ExponentialE;': '\u2147',
- 'exponentiale;': '\u2147',
- 'fallingdotseq;': '\u2252',
- 'Fcy;': '\u0424',
- 'fcy;': '\u0444',
- 'female;': '\u2640',
- 'ffilig;': '\ufb03',
- 'fflig;': '\ufb00',
- 'ffllig;': '\ufb04',
- 'Ffr;': '\U0001d509',
- 'ffr;': '\U0001d523',
- 'filig;': '\ufb01',
- 'FilledSmallSquare;': '\u25fc',
- 'FilledVerySmallSquare;': '\u25aa',
- 'fjlig;': 'fj',
- 'flat;': '\u266d',
- 'fllig;': '\ufb02',
- 'fltns;': '\u25b1',
- 'fnof;': '\u0192',
- 'Fopf;': '\U0001d53d',
- 'fopf;': '\U0001d557',
- 'ForAll;': '\u2200',
- 'forall;': '\u2200',
- 'fork;': '\u22d4',
- 'forkv;': '\u2ad9',
- 'Fouriertrf;': '\u2131',
- 'fpartint;': '\u2a0d',
- 'frac12': '\xbd',
- 'frac12;': '\xbd',
- 'frac13;': '\u2153',
- 'frac14': '\xbc',
- 'frac14;': '\xbc',
- 'frac15;': '\u2155',
- 'frac16;': '\u2159',
- 'frac18;': '\u215b',
- 'frac23;': '\u2154',
- 'frac25;': '\u2156',
- 'frac34': '\xbe',
- 'frac34;': '\xbe',
- 'frac35;': '\u2157',
- 'frac38;': '\u215c',
- 'frac45;': '\u2158',
- 'frac56;': '\u215a',
- 'frac58;': '\u215d',
- 'frac78;': '\u215e',
- 'frasl;': '\u2044',
- 'frown;': '\u2322',
- 'Fscr;': '\u2131',
- 'fscr;': '\U0001d4bb',
- 'gacute;': '\u01f5',
- 'Gamma;': '\u0393',
- 'gamma;': '\u03b3',
- 'Gammad;': '\u03dc',
- 'gammad;': '\u03dd',
- 'gap;': '\u2a86',
- 'Gbreve;': '\u011e',
- 'gbreve;': '\u011f',
- 'Gcedil;': '\u0122',
- 'Gcirc;': '\u011c',
- 'gcirc;': '\u011d',
- 'Gcy;': '\u0413',
- 'gcy;': '\u0433',
- 'Gdot;': '\u0120',
- 'gdot;': '\u0121',
- 'gE;': '\u2267',
- 'ge;': '\u2265',
- 'gEl;': '\u2a8c',
- 'gel;': '\u22db',
- 'geq;': '\u2265',
- 'geqq;': '\u2267',
- 'geqslant;': '\u2a7e',
- 'ges;': '\u2a7e',
- 'gescc;': '\u2aa9',
- 'gesdot;': '\u2a80',
- 'gesdoto;': '\u2a82',
- 'gesdotol;': '\u2a84',
- 'gesl;': '\u22db\ufe00',
- 'gesles;': '\u2a94',
- 'Gfr;': '\U0001d50a',
- 'gfr;': '\U0001d524',
- 'Gg;': '\u22d9',
- 'gg;': '\u226b',
- 'ggg;': '\u22d9',
- 'gimel;': '\u2137',
- 'GJcy;': '\u0403',
- 'gjcy;': '\u0453',
- 'gl;': '\u2277',
- 'gla;': '\u2aa5',
- 'glE;': '\u2a92',
- 'glj;': '\u2aa4',
- 'gnap;': '\u2a8a',
- 'gnapprox;': '\u2a8a',
- 'gnE;': '\u2269',
- 'gne;': '\u2a88',
- 'gneq;': '\u2a88',
- 'gneqq;': '\u2269',
- 'gnsim;': '\u22e7',
- 'Gopf;': '\U0001d53e',
- 'gopf;': '\U0001d558',
- 'grave;': '`',
- 'GreaterEqual;': '\u2265',
- 'GreaterEqualLess;': '\u22db',
- 'GreaterFullEqual;': '\u2267',
- 'GreaterGreater;': '\u2aa2',
- 'GreaterLess;': '\u2277',
- 'GreaterSlantEqual;': '\u2a7e',
- 'GreaterTilde;': '\u2273',
- 'Gscr;': '\U0001d4a2',
- 'gscr;': '\u210a',
- 'gsim;': '\u2273',
- 'gsime;': '\u2a8e',
- 'gsiml;': '\u2a90',
- 'GT': '>',
- 'gt': '>',
- 'GT;': '>',
- 'Gt;': '\u226b',
- 'gt;': '>',
- 'gtcc;': '\u2aa7',
- 'gtcir;': '\u2a7a',
- 'gtdot;': '\u22d7',
- 'gtlPar;': '\u2995',
- 'gtquest;': '\u2a7c',
- 'gtrapprox;': '\u2a86',
- 'gtrarr;': '\u2978',
- 'gtrdot;': '\u22d7',
- 'gtreqless;': '\u22db',
- 'gtreqqless;': '\u2a8c',
- 'gtrless;': '\u2277',
- 'gtrsim;': '\u2273',
- 'gvertneqq;': '\u2269\ufe00',
- 'gvnE;': '\u2269\ufe00',
- 'Hacek;': '\u02c7',
- 'hairsp;': '\u200a',
- 'half;': '\xbd',
- 'hamilt;': '\u210b',
- 'HARDcy;': '\u042a',
- 'hardcy;': '\u044a',
- 'hArr;': '\u21d4',
- 'harr;': '\u2194',
- 'harrcir;': '\u2948',
- 'harrw;': '\u21ad',
- 'Hat;': '^',
- 'hbar;': '\u210f',
- 'Hcirc;': '\u0124',
- 'hcirc;': '\u0125',
- 'hearts;': '\u2665',
- 'heartsuit;': '\u2665',
- 'hellip;': '\u2026',
- 'hercon;': '\u22b9',
- 'Hfr;': '\u210c',
- 'hfr;': '\U0001d525',
- 'HilbertSpace;': '\u210b',
- 'hksearow;': '\u2925',
- 'hkswarow;': '\u2926',
- 'hoarr;': '\u21ff',
- 'homtht;': '\u223b',
- 'hookleftarrow;': '\u21a9',
- 'hookrightarrow;': '\u21aa',
- 'Hopf;': '\u210d',
- 'hopf;': '\U0001d559',
- 'horbar;': '\u2015',
- 'HorizontalLine;': '\u2500',
- 'Hscr;': '\u210b',
- 'hscr;': '\U0001d4bd',
- 'hslash;': '\u210f',
- 'Hstrok;': '\u0126',
- 'hstrok;': '\u0127',
- 'HumpDownHump;': '\u224e',
- 'HumpEqual;': '\u224f',
- 'hybull;': '\u2043',
- 'hyphen;': '\u2010',
- 'Iacute': '\xcd',
- 'iacute': '\xed',
- 'Iacute;': '\xcd',
- 'iacute;': '\xed',
- 'ic;': '\u2063',
- 'Icirc': '\xce',
- 'icirc': '\xee',
- 'Icirc;': '\xce',
- 'icirc;': '\xee',
- 'Icy;': '\u0418',
- 'icy;': '\u0438',
- 'Idot;': '\u0130',
- 'IEcy;': '\u0415',
- 'iecy;': '\u0435',
- 'iexcl': '\xa1',
- 'iexcl;': '\xa1',
- 'iff;': '\u21d4',
- 'Ifr;': '\u2111',
- 'ifr;': '\U0001d526',
- 'Igrave': '\xcc',
- 'igrave': '\xec',
- 'Igrave;': '\xcc',
- 'igrave;': '\xec',
- 'ii;': '\u2148',
- 'iiiint;': '\u2a0c',
- 'iiint;': '\u222d',
- 'iinfin;': '\u29dc',
- 'iiota;': '\u2129',
- 'IJlig;': '\u0132',
- 'ijlig;': '\u0133',
- 'Im;': '\u2111',
- 'Imacr;': '\u012a',
- 'imacr;': '\u012b',
- 'image;': '\u2111',
- 'ImaginaryI;': '\u2148',
- 'imagline;': '\u2110',
- 'imagpart;': '\u2111',
- 'imath;': '\u0131',
- 'imof;': '\u22b7',
- 'imped;': '\u01b5',
- 'Implies;': '\u21d2',
- 'in;': '\u2208',
- 'incare;': '\u2105',
- 'infin;': '\u221e',
- 'infintie;': '\u29dd',
- 'inodot;': '\u0131',
- 'Int;': '\u222c',
- 'int;': '\u222b',
- 'intcal;': '\u22ba',
- 'integers;': '\u2124',
- 'Integral;': '\u222b',
- 'intercal;': '\u22ba',
- 'Intersection;': '\u22c2',
- 'intlarhk;': '\u2a17',
- 'intprod;': '\u2a3c',
- 'InvisibleComma;': '\u2063',
- 'InvisibleTimes;': '\u2062',
- 'IOcy;': '\u0401',
- 'iocy;': '\u0451',
- 'Iogon;': '\u012e',
- 'iogon;': '\u012f',
- 'Iopf;': '\U0001d540',
- 'iopf;': '\U0001d55a',
- 'Iota;': '\u0399',
- 'iota;': '\u03b9',
- 'iprod;': '\u2a3c',
- 'iquest': '\xbf',
- 'iquest;': '\xbf',
- 'Iscr;': '\u2110',
- 'iscr;': '\U0001d4be',
- 'isin;': '\u2208',
- 'isindot;': '\u22f5',
- 'isinE;': '\u22f9',
- 'isins;': '\u22f4',
- 'isinsv;': '\u22f3',
- 'isinv;': '\u2208',
- 'it;': '\u2062',
- 'Itilde;': '\u0128',
- 'itilde;': '\u0129',
- 'Iukcy;': '\u0406',
- 'iukcy;': '\u0456',
- 'Iuml': '\xcf',
- 'iuml': '\xef',
- 'Iuml;': '\xcf',
- 'iuml;': '\xef',
- 'Jcirc;': '\u0134',
- 'jcirc;': '\u0135',
- 'Jcy;': '\u0419',
- 'jcy;': '\u0439',
- 'Jfr;': '\U0001d50d',
- 'jfr;': '\U0001d527',
- 'jmath;': '\u0237',
- 'Jopf;': '\U0001d541',
- 'jopf;': '\U0001d55b',
- 'Jscr;': '\U0001d4a5',
- 'jscr;': '\U0001d4bf',
- 'Jsercy;': '\u0408',
- 'jsercy;': '\u0458',
- 'Jukcy;': '\u0404',
- 'jukcy;': '\u0454',
- 'Kappa;': '\u039a',
- 'kappa;': '\u03ba',
- 'kappav;': '\u03f0',
- 'Kcedil;': '\u0136',
- 'kcedil;': '\u0137',
- 'Kcy;': '\u041a',
- 'kcy;': '\u043a',
- 'Kfr;': '\U0001d50e',
- 'kfr;': '\U0001d528',
- 'kgreen;': '\u0138',
- 'KHcy;': '\u0425',
- 'khcy;': '\u0445',
- 'KJcy;': '\u040c',
- 'kjcy;': '\u045c',
- 'Kopf;': '\U0001d542',
- 'kopf;': '\U0001d55c',
- 'Kscr;': '\U0001d4a6',
- 'kscr;': '\U0001d4c0',
- 'lAarr;': '\u21da',
- 'Lacute;': '\u0139',
- 'lacute;': '\u013a',
- 'laemptyv;': '\u29b4',
- 'lagran;': '\u2112',
- 'Lambda;': '\u039b',
- 'lambda;': '\u03bb',
- 'Lang;': '\u27ea',
- 'lang;': '\u27e8',
- 'langd;': '\u2991',
- 'langle;': '\u27e8',
- 'lap;': '\u2a85',
- 'Laplacetrf;': '\u2112',
- 'laquo': '\xab',
- 'laquo;': '\xab',
- 'Larr;': '\u219e',
- 'lArr;': '\u21d0',
- 'larr;': '\u2190',
- 'larrb;': '\u21e4',
- 'larrbfs;': '\u291f',
- 'larrfs;': '\u291d',
- 'larrhk;': '\u21a9',
- 'larrlp;': '\u21ab',
- 'larrpl;': '\u2939',
- 'larrsim;': '\u2973',
- 'larrtl;': '\u21a2',
- 'lat;': '\u2aab',
- 'lAtail;': '\u291b',
- 'latail;': '\u2919',
- 'late;': '\u2aad',
- 'lates;': '\u2aad\ufe00',
- 'lBarr;': '\u290e',
- 'lbarr;': '\u290c',
- 'lbbrk;': '\u2772',
- 'lbrace;': '{',
- 'lbrack;': '[',
- 'lbrke;': '\u298b',
- 'lbrksld;': '\u298f',
- 'lbrkslu;': '\u298d',
- 'Lcaron;': '\u013d',
- 'lcaron;': '\u013e',
- 'Lcedil;': '\u013b',
- 'lcedil;': '\u013c',
- 'lceil;': '\u2308',
- 'lcub;': '{',
- 'Lcy;': '\u041b',
- 'lcy;': '\u043b',
- 'ldca;': '\u2936',
- 'ldquo;': '\u201c',
- 'ldquor;': '\u201e',
- 'ldrdhar;': '\u2967',
- 'ldrushar;': '\u294b',
- 'ldsh;': '\u21b2',
- 'lE;': '\u2266',
- 'le;': '\u2264',
- 'LeftAngleBracket;': '\u27e8',
- 'LeftArrow;': '\u2190',
- 'Leftarrow;': '\u21d0',
- 'leftarrow;': '\u2190',
- 'LeftArrowBar;': '\u21e4',
- 'LeftArrowRightArrow;': '\u21c6',
- 'leftarrowtail;': '\u21a2',
- 'LeftCeiling;': '\u2308',
- 'LeftDoubleBracket;': '\u27e6',
- 'LeftDownTeeVector;': '\u2961',
- 'LeftDownVector;': '\u21c3',
- 'LeftDownVectorBar;': '\u2959',
- 'LeftFloor;': '\u230a',
- 'leftharpoondown;': '\u21bd',
- 'leftharpoonup;': '\u21bc',
- 'leftleftarrows;': '\u21c7',
- 'LeftRightArrow;': '\u2194',
- 'Leftrightarrow;': '\u21d4',
- 'leftrightarrow;': '\u2194',
- 'leftrightarrows;': '\u21c6',
- 'leftrightharpoons;': '\u21cb',
- 'leftrightsquigarrow;': '\u21ad',
- 'LeftRightVector;': '\u294e',
- 'LeftTee;': '\u22a3',
- 'LeftTeeArrow;': '\u21a4',
- 'LeftTeeVector;': '\u295a',
- 'leftthreetimes;': '\u22cb',
- 'LeftTriangle;': '\u22b2',
- 'LeftTriangleBar;': '\u29cf',
- 'LeftTriangleEqual;': '\u22b4',
- 'LeftUpDownVector;': '\u2951',
- 'LeftUpTeeVector;': '\u2960',
- 'LeftUpVector;': '\u21bf',
- 'LeftUpVectorBar;': '\u2958',
- 'LeftVector;': '\u21bc',
- 'LeftVectorBar;': '\u2952',
- 'lEg;': '\u2a8b',
- 'leg;': '\u22da',
- 'leq;': '\u2264',
- 'leqq;': '\u2266',
- 'leqslant;': '\u2a7d',
- 'les;': '\u2a7d',
- 'lescc;': '\u2aa8',
- 'lesdot;': '\u2a7f',
- 'lesdoto;': '\u2a81',
- 'lesdotor;': '\u2a83',
- 'lesg;': '\u22da\ufe00',
- 'lesges;': '\u2a93',
- 'lessapprox;': '\u2a85',
- 'lessdot;': '\u22d6',
- 'lesseqgtr;': '\u22da',
- 'lesseqqgtr;': '\u2a8b',
- 'LessEqualGreater;': '\u22da',
- 'LessFullEqual;': '\u2266',
- 'LessGreater;': '\u2276',
- 'lessgtr;': '\u2276',
- 'LessLess;': '\u2aa1',
- 'lesssim;': '\u2272',
- 'LessSlantEqual;': '\u2a7d',
- 'LessTilde;': '\u2272',
- 'lfisht;': '\u297c',
- 'lfloor;': '\u230a',
- 'Lfr;': '\U0001d50f',
- 'lfr;': '\U0001d529',
- 'lg;': '\u2276',
- 'lgE;': '\u2a91',
- 'lHar;': '\u2962',
- 'lhard;': '\u21bd',
- 'lharu;': '\u21bc',
- 'lharul;': '\u296a',
- 'lhblk;': '\u2584',
- 'LJcy;': '\u0409',
- 'ljcy;': '\u0459',
- 'Ll;': '\u22d8',
- 'll;': '\u226a',
- 'llarr;': '\u21c7',
- 'llcorner;': '\u231e',
- 'Lleftarrow;': '\u21da',
- 'llhard;': '\u296b',
- 'lltri;': '\u25fa',
- 'Lmidot;': '\u013f',
- 'lmidot;': '\u0140',
- 'lmoust;': '\u23b0',
- 'lmoustache;': '\u23b0',
- 'lnap;': '\u2a89',
- 'lnapprox;': '\u2a89',
- 'lnE;': '\u2268',
- 'lne;': '\u2a87',
- 'lneq;': '\u2a87',
- 'lneqq;': '\u2268',
- 'lnsim;': '\u22e6',
- 'loang;': '\u27ec',
- 'loarr;': '\u21fd',
- 'lobrk;': '\u27e6',
- 'LongLeftArrow;': '\u27f5',
- 'Longleftarrow;': '\u27f8',
- 'longleftarrow;': '\u27f5',
- 'LongLeftRightArrow;': '\u27f7',
- 'Longleftrightarrow;': '\u27fa',
- 'longleftrightarrow;': '\u27f7',
- 'longmapsto;': '\u27fc',
- 'LongRightArrow;': '\u27f6',
- 'Longrightarrow;': '\u27f9',
- 'longrightarrow;': '\u27f6',
- 'looparrowleft;': '\u21ab',
- 'looparrowright;': '\u21ac',
- 'lopar;': '\u2985',
- 'Lopf;': '\U0001d543',
- 'lopf;': '\U0001d55d',
- 'loplus;': '\u2a2d',
- 'lotimes;': '\u2a34',
- 'lowast;': '\u2217',
- 'lowbar;': '_',
- 'LowerLeftArrow;': '\u2199',
- 'LowerRightArrow;': '\u2198',
- 'loz;': '\u25ca',
- 'lozenge;': '\u25ca',
- 'lozf;': '\u29eb',
- 'lpar;': '(',
- 'lparlt;': '\u2993',
- 'lrarr;': '\u21c6',
- 'lrcorner;': '\u231f',
- 'lrhar;': '\u21cb',
- 'lrhard;': '\u296d',
- 'lrm;': '\u200e',
- 'lrtri;': '\u22bf',
- 'lsaquo;': '\u2039',
- 'Lscr;': '\u2112',
- 'lscr;': '\U0001d4c1',
- 'Lsh;': '\u21b0',
- 'lsh;': '\u21b0',
- 'lsim;': '\u2272',
- 'lsime;': '\u2a8d',
- 'lsimg;': '\u2a8f',
- 'lsqb;': '[',
- 'lsquo;': '\u2018',
- 'lsquor;': '\u201a',
- 'Lstrok;': '\u0141',
- 'lstrok;': '\u0142',
- 'LT': '<',
- 'lt': '<',
- 'LT;': '<',
- 'Lt;': '\u226a',
- 'lt;': '<',
- 'ltcc;': '\u2aa6',
- 'ltcir;': '\u2a79',
- 'ltdot;': '\u22d6',
- 'lthree;': '\u22cb',
- 'ltimes;': '\u22c9',
- 'ltlarr;': '\u2976',
- 'ltquest;': '\u2a7b',
- 'ltri;': '\u25c3',
- 'ltrie;': '\u22b4',
- 'ltrif;': '\u25c2',
- 'ltrPar;': '\u2996',
- 'lurdshar;': '\u294a',
- 'luruhar;': '\u2966',
- 'lvertneqq;': '\u2268\ufe00',
- 'lvnE;': '\u2268\ufe00',
- 'macr': '\xaf',
- 'macr;': '\xaf',
- 'male;': '\u2642',
- 'malt;': '\u2720',
- 'maltese;': '\u2720',
- 'Map;': '\u2905',
- 'map;': '\u21a6',
- 'mapsto;': '\u21a6',
- 'mapstodown;': '\u21a7',
- 'mapstoleft;': '\u21a4',
- 'mapstoup;': '\u21a5',
- 'marker;': '\u25ae',
- 'mcomma;': '\u2a29',
- 'Mcy;': '\u041c',
- 'mcy;': '\u043c',
- 'mdash;': '\u2014',
- 'mDDot;': '\u223a',
- 'measuredangle;': '\u2221',
- 'MediumSpace;': '\u205f',
- 'Mellintrf;': '\u2133',
- 'Mfr;': '\U0001d510',
- 'mfr;': '\U0001d52a',
- 'mho;': '\u2127',
- 'micro': '\xb5',
- 'micro;': '\xb5',
- 'mid;': '\u2223',
- 'midast;': '*',
- 'midcir;': '\u2af0',
- 'middot': '\xb7',
- 'middot;': '\xb7',
- 'minus;': '\u2212',
- 'minusb;': '\u229f',
- 'minusd;': '\u2238',
- 'minusdu;': '\u2a2a',
- 'MinusPlus;': '\u2213',
- 'mlcp;': '\u2adb',
- 'mldr;': '\u2026',
- 'mnplus;': '\u2213',
- 'models;': '\u22a7',
- 'Mopf;': '\U0001d544',
- 'mopf;': '\U0001d55e',
- 'mp;': '\u2213',
- 'Mscr;': '\u2133',
- 'mscr;': '\U0001d4c2',
- 'mstpos;': '\u223e',
- 'Mu;': '\u039c',
- 'mu;': '\u03bc',
- 'multimap;': '\u22b8',
- 'mumap;': '\u22b8',
- 'nabla;': '\u2207',
- 'Nacute;': '\u0143',
- 'nacute;': '\u0144',
- 'nang;': '\u2220\u20d2',
- 'nap;': '\u2249',
- 'napE;': '\u2a70\u0338',
- 'napid;': '\u224b\u0338',
- 'napos;': '\u0149',
- 'napprox;': '\u2249',
- 'natur;': '\u266e',
- 'natural;': '\u266e',
- 'naturals;': '\u2115',
- 'nbsp': '\xa0',
- 'nbsp;': '\xa0',
- 'nbump;': '\u224e\u0338',
- 'nbumpe;': '\u224f\u0338',
- 'ncap;': '\u2a43',
- 'Ncaron;': '\u0147',
- 'ncaron;': '\u0148',
- 'Ncedil;': '\u0145',
- 'ncedil;': '\u0146',
- 'ncong;': '\u2247',
- 'ncongdot;': '\u2a6d\u0338',
- 'ncup;': '\u2a42',
- 'Ncy;': '\u041d',
- 'ncy;': '\u043d',
- 'ndash;': '\u2013',
- 'ne;': '\u2260',
- 'nearhk;': '\u2924',
- 'neArr;': '\u21d7',
- 'nearr;': '\u2197',
- 'nearrow;': '\u2197',
- 'nedot;': '\u2250\u0338',
- 'NegativeMediumSpace;': '\u200b',
- 'NegativeThickSpace;': '\u200b',
- 'NegativeThinSpace;': '\u200b',
- 'NegativeVeryThinSpace;': '\u200b',
- 'nequiv;': '\u2262',
- 'nesear;': '\u2928',
- 'nesim;': '\u2242\u0338',
- 'NestedGreaterGreater;': '\u226b',
- 'NestedLessLess;': '\u226a',
- 'NewLine;': '\n',
- 'nexist;': '\u2204',
- 'nexists;': '\u2204',
- 'Nfr;': '\U0001d511',
- 'nfr;': '\U0001d52b',
- 'ngE;': '\u2267\u0338',
- 'nge;': '\u2271',
- 'ngeq;': '\u2271',
- 'ngeqq;': '\u2267\u0338',
- 'ngeqslant;': '\u2a7e\u0338',
- 'nges;': '\u2a7e\u0338',
- 'nGg;': '\u22d9\u0338',
- 'ngsim;': '\u2275',
- 'nGt;': '\u226b\u20d2',
- 'ngt;': '\u226f',
- 'ngtr;': '\u226f',
- 'nGtv;': '\u226b\u0338',
- 'nhArr;': '\u21ce',
- 'nharr;': '\u21ae',
- 'nhpar;': '\u2af2',
- 'ni;': '\u220b',
- 'nis;': '\u22fc',
- 'nisd;': '\u22fa',
- 'niv;': '\u220b',
- 'NJcy;': '\u040a',
- 'njcy;': '\u045a',
- 'nlArr;': '\u21cd',
- 'nlarr;': '\u219a',
- 'nldr;': '\u2025',
- 'nlE;': '\u2266\u0338',
- 'nle;': '\u2270',
- 'nLeftarrow;': '\u21cd',
- 'nleftarrow;': '\u219a',
- 'nLeftrightarrow;': '\u21ce',
- 'nleftrightarrow;': '\u21ae',
- 'nleq;': '\u2270',
- 'nleqq;': '\u2266\u0338',
- 'nleqslant;': '\u2a7d\u0338',
- 'nles;': '\u2a7d\u0338',
- 'nless;': '\u226e',
- 'nLl;': '\u22d8\u0338',
- 'nlsim;': '\u2274',
- 'nLt;': '\u226a\u20d2',
- 'nlt;': '\u226e',
- 'nltri;': '\u22ea',
- 'nltrie;': '\u22ec',
- 'nLtv;': '\u226a\u0338',
- 'nmid;': '\u2224',
- 'NoBreak;': '\u2060',
- 'NonBreakingSpace;': '\xa0',
- 'Nopf;': '\u2115',
- 'nopf;': '\U0001d55f',
- 'not': '\xac',
- 'Not;': '\u2aec',
- 'not;': '\xac',
- 'NotCongruent;': '\u2262',
- 'NotCupCap;': '\u226d',
- 'NotDoubleVerticalBar;': '\u2226',
- 'NotElement;': '\u2209',
- 'NotEqual;': '\u2260',
- 'NotEqualTilde;': '\u2242\u0338',
- 'NotExists;': '\u2204',
- 'NotGreater;': '\u226f',
- 'NotGreaterEqual;': '\u2271',
- 'NotGreaterFullEqual;': '\u2267\u0338',
- 'NotGreaterGreater;': '\u226b\u0338',
- 'NotGreaterLess;': '\u2279',
- 'NotGreaterSlantEqual;': '\u2a7e\u0338',
- 'NotGreaterTilde;': '\u2275',
- 'NotHumpDownHump;': '\u224e\u0338',
- 'NotHumpEqual;': '\u224f\u0338',
- 'notin;': '\u2209',
- 'notindot;': '\u22f5\u0338',
- 'notinE;': '\u22f9\u0338',
- 'notinva;': '\u2209',
- 'notinvb;': '\u22f7',
- 'notinvc;': '\u22f6',
- 'NotLeftTriangle;': '\u22ea',
- 'NotLeftTriangleBar;': '\u29cf\u0338',
- 'NotLeftTriangleEqual;': '\u22ec',
- 'NotLess;': '\u226e',
- 'NotLessEqual;': '\u2270',
- 'NotLessGreater;': '\u2278',
- 'NotLessLess;': '\u226a\u0338',
- 'NotLessSlantEqual;': '\u2a7d\u0338',
- 'NotLessTilde;': '\u2274',
- 'NotNestedGreaterGreater;': '\u2aa2\u0338',
- 'NotNestedLessLess;': '\u2aa1\u0338',
- 'notni;': '\u220c',
- 'notniva;': '\u220c',
- 'notnivb;': '\u22fe',
- 'notnivc;': '\u22fd',
- 'NotPrecedes;': '\u2280',
- 'NotPrecedesEqual;': '\u2aaf\u0338',
- 'NotPrecedesSlantEqual;': '\u22e0',
- 'NotReverseElement;': '\u220c',
- 'NotRightTriangle;': '\u22eb',
- 'NotRightTriangleBar;': '\u29d0\u0338',
- 'NotRightTriangleEqual;': '\u22ed',
- 'NotSquareSubset;': '\u228f\u0338',
- 'NotSquareSubsetEqual;': '\u22e2',
- 'NotSquareSuperset;': '\u2290\u0338',
- 'NotSquareSupersetEqual;': '\u22e3',
- 'NotSubset;': '\u2282\u20d2',
- 'NotSubsetEqual;': '\u2288',
- 'NotSucceeds;': '\u2281',
- 'NotSucceedsEqual;': '\u2ab0\u0338',
- 'NotSucceedsSlantEqual;': '\u22e1',
- 'NotSucceedsTilde;': '\u227f\u0338',
- 'NotSuperset;': '\u2283\u20d2',
- 'NotSupersetEqual;': '\u2289',
- 'NotTilde;': '\u2241',
- 'NotTildeEqual;': '\u2244',
- 'NotTildeFullEqual;': '\u2247',
- 'NotTildeTilde;': '\u2249',
- 'NotVerticalBar;': '\u2224',
- 'npar;': '\u2226',
- 'nparallel;': '\u2226',
- 'nparsl;': '\u2afd\u20e5',
- 'npart;': '\u2202\u0338',
- 'npolint;': '\u2a14',
- 'npr;': '\u2280',
- 'nprcue;': '\u22e0',
- 'npre;': '\u2aaf\u0338',
- 'nprec;': '\u2280',
- 'npreceq;': '\u2aaf\u0338',
- 'nrArr;': '\u21cf',
- 'nrarr;': '\u219b',
- 'nrarrc;': '\u2933\u0338',
- 'nrarrw;': '\u219d\u0338',
- 'nRightarrow;': '\u21cf',
- 'nrightarrow;': '\u219b',
- 'nrtri;': '\u22eb',
- 'nrtrie;': '\u22ed',
- 'nsc;': '\u2281',
- 'nsccue;': '\u22e1',
- 'nsce;': '\u2ab0\u0338',
- 'Nscr;': '\U0001d4a9',
- 'nscr;': '\U0001d4c3',
- 'nshortmid;': '\u2224',
- 'nshortparallel;': '\u2226',
- 'nsim;': '\u2241',
- 'nsime;': '\u2244',
- 'nsimeq;': '\u2244',
- 'nsmid;': '\u2224',
- 'nspar;': '\u2226',
- 'nsqsube;': '\u22e2',
- 'nsqsupe;': '\u22e3',
- 'nsub;': '\u2284',
- 'nsubE;': '\u2ac5\u0338',
- 'nsube;': '\u2288',
- 'nsubset;': '\u2282\u20d2',
- 'nsubseteq;': '\u2288',
- 'nsubseteqq;': '\u2ac5\u0338',
- 'nsucc;': '\u2281',
- 'nsucceq;': '\u2ab0\u0338',
- 'nsup;': '\u2285',
- 'nsupE;': '\u2ac6\u0338',
- 'nsupe;': '\u2289',
- 'nsupset;': '\u2283\u20d2',
- 'nsupseteq;': '\u2289',
- 'nsupseteqq;': '\u2ac6\u0338',
- 'ntgl;': '\u2279',
- 'Ntilde': '\xd1',
- 'ntilde': '\xf1',
- 'Ntilde;': '\xd1',
- 'ntilde;': '\xf1',
- 'ntlg;': '\u2278',
- 'ntriangleleft;': '\u22ea',
- 'ntrianglelefteq;': '\u22ec',
- 'ntriangleright;': '\u22eb',
- 'ntrianglerighteq;': '\u22ed',
- 'Nu;': '\u039d',
- 'nu;': '\u03bd',
- 'num;': '#',
- 'numero;': '\u2116',
- 'numsp;': '\u2007',
- 'nvap;': '\u224d\u20d2',
- 'nVDash;': '\u22af',
- 'nVdash;': '\u22ae',
- 'nvDash;': '\u22ad',
- 'nvdash;': '\u22ac',
- 'nvge;': '\u2265\u20d2',
- 'nvgt;': '>\u20d2',
- 'nvHarr;': '\u2904',
- 'nvinfin;': '\u29de',
- 'nvlArr;': '\u2902',
- 'nvle;': '\u2264\u20d2',
- 'nvlt;': '<\u20d2',
- 'nvltrie;': '\u22b4\u20d2',
- 'nvrArr;': '\u2903',
- 'nvrtrie;': '\u22b5\u20d2',
- 'nvsim;': '\u223c\u20d2',
- 'nwarhk;': '\u2923',
- 'nwArr;': '\u21d6',
- 'nwarr;': '\u2196',
- 'nwarrow;': '\u2196',
- 'nwnear;': '\u2927',
- 'Oacute': '\xd3',
- 'oacute': '\xf3',
- 'Oacute;': '\xd3',
- 'oacute;': '\xf3',
- 'oast;': '\u229b',
- 'ocir;': '\u229a',
- 'Ocirc': '\xd4',
- 'ocirc': '\xf4',
- 'Ocirc;': '\xd4',
- 'ocirc;': '\xf4',
- 'Ocy;': '\u041e',
- 'ocy;': '\u043e',
- 'odash;': '\u229d',
- 'Odblac;': '\u0150',
- 'odblac;': '\u0151',
- 'odiv;': '\u2a38',
- 'odot;': '\u2299',
- 'odsold;': '\u29bc',
- 'OElig;': '\u0152',
- 'oelig;': '\u0153',
- 'ofcir;': '\u29bf',
- 'Ofr;': '\U0001d512',
- 'ofr;': '\U0001d52c',
- 'ogon;': '\u02db',
- 'Ograve': '\xd2',
- 'ograve': '\xf2',
- 'Ograve;': '\xd2',
- 'ograve;': '\xf2',
- 'ogt;': '\u29c1',
- 'ohbar;': '\u29b5',
- 'ohm;': '\u03a9',
- 'oint;': '\u222e',
- 'olarr;': '\u21ba',
- 'olcir;': '\u29be',
- 'olcross;': '\u29bb',
- 'oline;': '\u203e',
- 'olt;': '\u29c0',
- 'Omacr;': '\u014c',
- 'omacr;': '\u014d',
- 'Omega;': '\u03a9',
- 'omega;': '\u03c9',
- 'Omicron;': '\u039f',
- 'omicron;': '\u03bf',
- 'omid;': '\u29b6',
- 'ominus;': '\u2296',
- 'Oopf;': '\U0001d546',
- 'oopf;': '\U0001d560',
- 'opar;': '\u29b7',
- 'OpenCurlyDoubleQuote;': '\u201c',
- 'OpenCurlyQuote;': '\u2018',
- 'operp;': '\u29b9',
- 'oplus;': '\u2295',
- 'Or;': '\u2a54',
- 'or;': '\u2228',
- 'orarr;': '\u21bb',
- 'ord;': '\u2a5d',
- 'order;': '\u2134',
- 'orderof;': '\u2134',
- 'ordf': '\xaa',
- 'ordf;': '\xaa',
- 'ordm': '\xba',
- 'ordm;': '\xba',
- 'origof;': '\u22b6',
- 'oror;': '\u2a56',
- 'orslope;': '\u2a57',
- 'orv;': '\u2a5b',
- 'oS;': '\u24c8',
- 'Oscr;': '\U0001d4aa',
- 'oscr;': '\u2134',
- 'Oslash': '\xd8',
- 'oslash': '\xf8',
- 'Oslash;': '\xd8',
- 'oslash;': '\xf8',
- 'osol;': '\u2298',
- 'Otilde': '\xd5',
- 'otilde': '\xf5',
- 'Otilde;': '\xd5',
- 'otilde;': '\xf5',
- 'Otimes;': '\u2a37',
- 'otimes;': '\u2297',
- 'otimesas;': '\u2a36',
- 'Ouml': '\xd6',
- 'ouml': '\xf6',
- 'Ouml;': '\xd6',
- 'ouml;': '\xf6',
- 'ovbar;': '\u233d',
- 'OverBar;': '\u203e',
- 'OverBrace;': '\u23de',
- 'OverBracket;': '\u23b4',
- 'OverParenthesis;': '\u23dc',
- 'par;': '\u2225',
- 'para': '\xb6',
- 'para;': '\xb6',
- 'parallel;': '\u2225',
- 'parsim;': '\u2af3',
- 'parsl;': '\u2afd',
- 'part;': '\u2202',
- 'PartialD;': '\u2202',
- 'Pcy;': '\u041f',
- 'pcy;': '\u043f',
- 'percnt;': '%',
- 'period;': '.',
- 'permil;': '\u2030',
- 'perp;': '\u22a5',
- 'pertenk;': '\u2031',
- 'Pfr;': '\U0001d513',
- 'pfr;': '\U0001d52d',
- 'Phi;': '\u03a6',
- 'phi;': '\u03c6',
- 'phiv;': '\u03d5',
- 'phmmat;': '\u2133',
- 'phone;': '\u260e',
- 'Pi;': '\u03a0',
- 'pi;': '\u03c0',
- 'pitchfork;': '\u22d4',
- 'piv;': '\u03d6',
- 'planck;': '\u210f',
- 'planckh;': '\u210e',
- 'plankv;': '\u210f',
- 'plus;': '+',
- 'plusacir;': '\u2a23',
- 'plusb;': '\u229e',
- 'pluscir;': '\u2a22',
- 'plusdo;': '\u2214',
- 'plusdu;': '\u2a25',
- 'pluse;': '\u2a72',
- 'PlusMinus;': '\xb1',
- 'plusmn': '\xb1',
- 'plusmn;': '\xb1',
- 'plussim;': '\u2a26',
- 'plustwo;': '\u2a27',
- 'pm;': '\xb1',
- 'Poincareplane;': '\u210c',
- 'pointint;': '\u2a15',
- 'Popf;': '\u2119',
- 'popf;': '\U0001d561',
- 'pound': '\xa3',
- 'pound;': '\xa3',
- 'Pr;': '\u2abb',
- 'pr;': '\u227a',
- 'prap;': '\u2ab7',
- 'prcue;': '\u227c',
- 'prE;': '\u2ab3',
- 'pre;': '\u2aaf',
- 'prec;': '\u227a',
- 'precapprox;': '\u2ab7',
- 'preccurlyeq;': '\u227c',
- 'Precedes;': '\u227a',
- 'PrecedesEqual;': '\u2aaf',
- 'PrecedesSlantEqual;': '\u227c',
- 'PrecedesTilde;': '\u227e',
- 'preceq;': '\u2aaf',
- 'precnapprox;': '\u2ab9',
- 'precneqq;': '\u2ab5',
- 'precnsim;': '\u22e8',
- 'precsim;': '\u227e',
- 'Prime;': '\u2033',
- 'prime;': '\u2032',
- 'primes;': '\u2119',
- 'prnap;': '\u2ab9',
- 'prnE;': '\u2ab5',
- 'prnsim;': '\u22e8',
- 'prod;': '\u220f',
- 'Product;': '\u220f',
- 'profalar;': '\u232e',
- 'profline;': '\u2312',
- 'profsurf;': '\u2313',
- 'prop;': '\u221d',
- 'Proportion;': '\u2237',
- 'Proportional;': '\u221d',
- 'propto;': '\u221d',
- 'prsim;': '\u227e',
- 'prurel;': '\u22b0',
- 'Pscr;': '\U0001d4ab',
- 'pscr;': '\U0001d4c5',
- 'Psi;': '\u03a8',
- 'psi;': '\u03c8',
- 'puncsp;': '\u2008',
- 'Qfr;': '\U0001d514',
- 'qfr;': '\U0001d52e',
- 'qint;': '\u2a0c',
- 'Qopf;': '\u211a',
- 'qopf;': '\U0001d562',
- 'qprime;': '\u2057',
- 'Qscr;': '\U0001d4ac',
- 'qscr;': '\U0001d4c6',
- 'quaternions;': '\u210d',
- 'quatint;': '\u2a16',
- 'quest;': '?',
- 'questeq;': '\u225f',
- 'QUOT': '"',
- 'quot': '"',
- 'QUOT;': '"',
- 'quot;': '"',
- 'rAarr;': '\u21db',
- 'race;': '\u223d\u0331',
- 'Racute;': '\u0154',
- 'racute;': '\u0155',
- 'radic;': '\u221a',
- 'raemptyv;': '\u29b3',
- 'Rang;': '\u27eb',
- 'rang;': '\u27e9',
- 'rangd;': '\u2992',
- 'range;': '\u29a5',
- 'rangle;': '\u27e9',
- 'raquo': '\xbb',
- 'raquo;': '\xbb',
- 'Rarr;': '\u21a0',
- 'rArr;': '\u21d2',
- 'rarr;': '\u2192',
- 'rarrap;': '\u2975',
- 'rarrb;': '\u21e5',
- 'rarrbfs;': '\u2920',
- 'rarrc;': '\u2933',
- 'rarrfs;': '\u291e',
- 'rarrhk;': '\u21aa',
- 'rarrlp;': '\u21ac',
- 'rarrpl;': '\u2945',
- 'rarrsim;': '\u2974',
- 'Rarrtl;': '\u2916',
- 'rarrtl;': '\u21a3',
- 'rarrw;': '\u219d',
- 'rAtail;': '\u291c',
- 'ratail;': '\u291a',
- 'ratio;': '\u2236',
- 'rationals;': '\u211a',
- 'RBarr;': '\u2910',
- 'rBarr;': '\u290f',
- 'rbarr;': '\u290d',
- 'rbbrk;': '\u2773',
- 'rbrace;': '}',
- 'rbrack;': ']',
- 'rbrke;': '\u298c',
- 'rbrksld;': '\u298e',
- 'rbrkslu;': '\u2990',
- 'Rcaron;': '\u0158',
- 'rcaron;': '\u0159',
- 'Rcedil;': '\u0156',
- 'rcedil;': '\u0157',
- 'rceil;': '\u2309',
- 'rcub;': '}',
- 'Rcy;': '\u0420',
- 'rcy;': '\u0440',
- 'rdca;': '\u2937',
- 'rdldhar;': '\u2969',
- 'rdquo;': '\u201d',
- 'rdquor;': '\u201d',
- 'rdsh;': '\u21b3',
- 'Re;': '\u211c',
- 'real;': '\u211c',
- 'realine;': '\u211b',
- 'realpart;': '\u211c',
- 'reals;': '\u211d',
- 'rect;': '\u25ad',
- 'REG': '\xae',
- 'reg': '\xae',
- 'REG;': '\xae',
- 'reg;': '\xae',
- 'ReverseElement;': '\u220b',
- 'ReverseEquilibrium;': '\u21cb',
- 'ReverseUpEquilibrium;': '\u296f',
- 'rfisht;': '\u297d',
- 'rfloor;': '\u230b',
- 'Rfr;': '\u211c',
- 'rfr;': '\U0001d52f',
- 'rHar;': '\u2964',
- 'rhard;': '\u21c1',
- 'rharu;': '\u21c0',
- 'rharul;': '\u296c',
- 'Rho;': '\u03a1',
- 'rho;': '\u03c1',
- 'rhov;': '\u03f1',
- 'RightAngleBracket;': '\u27e9',
- 'RightArrow;': '\u2192',
- 'Rightarrow;': '\u21d2',
- 'rightarrow;': '\u2192',
- 'RightArrowBar;': '\u21e5',
- 'RightArrowLeftArrow;': '\u21c4',
- 'rightarrowtail;': '\u21a3',
- 'RightCeiling;': '\u2309',
- 'RightDoubleBracket;': '\u27e7',
- 'RightDownTeeVector;': '\u295d',
- 'RightDownVector;': '\u21c2',
- 'RightDownVectorBar;': '\u2955',
- 'RightFloor;': '\u230b',
- 'rightharpoondown;': '\u21c1',
- 'rightharpoonup;': '\u21c0',
- 'rightleftarrows;': '\u21c4',
- 'rightleftharpoons;': '\u21cc',
- 'rightrightarrows;': '\u21c9',
- 'rightsquigarrow;': '\u219d',
- 'RightTee;': '\u22a2',
- 'RightTeeArrow;': '\u21a6',
- 'RightTeeVector;': '\u295b',
- 'rightthreetimes;': '\u22cc',
- 'RightTriangle;': '\u22b3',
- 'RightTriangleBar;': '\u29d0',
- 'RightTriangleEqual;': '\u22b5',
- 'RightUpDownVector;': '\u294f',
- 'RightUpTeeVector;': '\u295c',
- 'RightUpVector;': '\u21be',
- 'RightUpVectorBar;': '\u2954',
- 'RightVector;': '\u21c0',
- 'RightVectorBar;': '\u2953',
- 'ring;': '\u02da',
- 'risingdotseq;': '\u2253',
- 'rlarr;': '\u21c4',
- 'rlhar;': '\u21cc',
- 'rlm;': '\u200f',
- 'rmoust;': '\u23b1',
- 'rmoustache;': '\u23b1',
- 'rnmid;': '\u2aee',
- 'roang;': '\u27ed',
- 'roarr;': '\u21fe',
- 'robrk;': '\u27e7',
- 'ropar;': '\u2986',
- 'Ropf;': '\u211d',
- 'ropf;': '\U0001d563',
- 'roplus;': '\u2a2e',
- 'rotimes;': '\u2a35',
- 'RoundImplies;': '\u2970',
- 'rpar;': ')',
- 'rpargt;': '\u2994',
- 'rppolint;': '\u2a12',
- 'rrarr;': '\u21c9',
- 'Rrightarrow;': '\u21db',
- 'rsaquo;': '\u203a',
- 'Rscr;': '\u211b',
- 'rscr;': '\U0001d4c7',
- 'Rsh;': '\u21b1',
- 'rsh;': '\u21b1',
- 'rsqb;': ']',
- 'rsquo;': '\u2019',
- 'rsquor;': '\u2019',
- 'rthree;': '\u22cc',
- 'rtimes;': '\u22ca',
- 'rtri;': '\u25b9',
- 'rtrie;': '\u22b5',
- 'rtrif;': '\u25b8',
- 'rtriltri;': '\u29ce',
- 'RuleDelayed;': '\u29f4',
- 'ruluhar;': '\u2968',
- 'rx;': '\u211e',
- 'Sacute;': '\u015a',
- 'sacute;': '\u015b',
- 'sbquo;': '\u201a',
- 'Sc;': '\u2abc',
- 'sc;': '\u227b',
- 'scap;': '\u2ab8',
- 'Scaron;': '\u0160',
- 'scaron;': '\u0161',
- 'sccue;': '\u227d',
- 'scE;': '\u2ab4',
- 'sce;': '\u2ab0',
- 'Scedil;': '\u015e',
- 'scedil;': '\u015f',
- 'Scirc;': '\u015c',
- 'scirc;': '\u015d',
- 'scnap;': '\u2aba',
- 'scnE;': '\u2ab6',
- 'scnsim;': '\u22e9',
- 'scpolint;': '\u2a13',
- 'scsim;': '\u227f',
- 'Scy;': '\u0421',
- 'scy;': '\u0441',
- 'sdot;': '\u22c5',
- 'sdotb;': '\u22a1',
- 'sdote;': '\u2a66',
- 'searhk;': '\u2925',
- 'seArr;': '\u21d8',
- 'searr;': '\u2198',
- 'searrow;': '\u2198',
- 'sect': '\xa7',
- 'sect;': '\xa7',
- 'semi;': ';',
- 'seswar;': '\u2929',
- 'setminus;': '\u2216',
- 'setmn;': '\u2216',
- 'sext;': '\u2736',
- 'Sfr;': '\U0001d516',
- 'sfr;': '\U0001d530',
- 'sfrown;': '\u2322',
- 'sharp;': '\u266f',
- 'SHCHcy;': '\u0429',
- 'shchcy;': '\u0449',
- 'SHcy;': '\u0428',
- 'shcy;': '\u0448',
- 'ShortDownArrow;': '\u2193',
- 'ShortLeftArrow;': '\u2190',
- 'shortmid;': '\u2223',
- 'shortparallel;': '\u2225',
- 'ShortRightArrow;': '\u2192',
- 'ShortUpArrow;': '\u2191',
- 'shy': '\xad',
- 'shy;': '\xad',
- 'Sigma;': '\u03a3',
- 'sigma;': '\u03c3',
- 'sigmaf;': '\u03c2',
- 'sigmav;': '\u03c2',
- 'sim;': '\u223c',
- 'simdot;': '\u2a6a',
- 'sime;': '\u2243',
- 'simeq;': '\u2243',
- 'simg;': '\u2a9e',
- 'simgE;': '\u2aa0',
- 'siml;': '\u2a9d',
- 'simlE;': '\u2a9f',
- 'simne;': '\u2246',
- 'simplus;': '\u2a24',
- 'simrarr;': '\u2972',
- 'slarr;': '\u2190',
- 'SmallCircle;': '\u2218',
- 'smallsetminus;': '\u2216',
- 'smashp;': '\u2a33',
- 'smeparsl;': '\u29e4',
- 'smid;': '\u2223',
- 'smile;': '\u2323',
- 'smt;': '\u2aaa',
- 'smte;': '\u2aac',
- 'smtes;': '\u2aac\ufe00',
- 'SOFTcy;': '\u042c',
- 'softcy;': '\u044c',
- 'sol;': '/',
- 'solb;': '\u29c4',
- 'solbar;': '\u233f',
- 'Sopf;': '\U0001d54a',
- 'sopf;': '\U0001d564',
- 'spades;': '\u2660',
- 'spadesuit;': '\u2660',
- 'spar;': '\u2225',
- 'sqcap;': '\u2293',
- 'sqcaps;': '\u2293\ufe00',
- 'sqcup;': '\u2294',
- 'sqcups;': '\u2294\ufe00',
- 'Sqrt;': '\u221a',
- 'sqsub;': '\u228f',
- 'sqsube;': '\u2291',
- 'sqsubset;': '\u228f',
- 'sqsubseteq;': '\u2291',
- 'sqsup;': '\u2290',
- 'sqsupe;': '\u2292',
- 'sqsupset;': '\u2290',
- 'sqsupseteq;': '\u2292',
- 'squ;': '\u25a1',
- 'Square;': '\u25a1',
- 'square;': '\u25a1',
- 'SquareIntersection;': '\u2293',
- 'SquareSubset;': '\u228f',
- 'SquareSubsetEqual;': '\u2291',
- 'SquareSuperset;': '\u2290',
- 'SquareSupersetEqual;': '\u2292',
- 'SquareUnion;': '\u2294',
- 'squarf;': '\u25aa',
- 'squf;': '\u25aa',
- 'srarr;': '\u2192',
- 'Sscr;': '\U0001d4ae',
- 'sscr;': '\U0001d4c8',
- 'ssetmn;': '\u2216',
- 'ssmile;': '\u2323',
- 'sstarf;': '\u22c6',
- 'Star;': '\u22c6',
- 'star;': '\u2606',
- 'starf;': '\u2605',
- 'straightepsilon;': '\u03f5',
- 'straightphi;': '\u03d5',
- 'strns;': '\xaf',
- 'Sub;': '\u22d0',
- 'sub;': '\u2282',
- 'subdot;': '\u2abd',
- 'subE;': '\u2ac5',
- 'sube;': '\u2286',
- 'subedot;': '\u2ac3',
- 'submult;': '\u2ac1',
- 'subnE;': '\u2acb',
- 'subne;': '\u228a',
- 'subplus;': '\u2abf',
- 'subrarr;': '\u2979',
- 'Subset;': '\u22d0',
- 'subset;': '\u2282',
- 'subseteq;': '\u2286',
- 'subseteqq;': '\u2ac5',
- 'SubsetEqual;': '\u2286',
- 'subsetneq;': '\u228a',
- 'subsetneqq;': '\u2acb',
- 'subsim;': '\u2ac7',
- 'subsub;': '\u2ad5',
- 'subsup;': '\u2ad3',
- 'succ;': '\u227b',
- 'succapprox;': '\u2ab8',
- 'succcurlyeq;': '\u227d',
- 'Succeeds;': '\u227b',
- 'SucceedsEqual;': '\u2ab0',
- 'SucceedsSlantEqual;': '\u227d',
- 'SucceedsTilde;': '\u227f',
- 'succeq;': '\u2ab0',
- 'succnapprox;': '\u2aba',
- 'succneqq;': '\u2ab6',
- 'succnsim;': '\u22e9',
- 'succsim;': '\u227f',
- 'SuchThat;': '\u220b',
- 'Sum;': '\u2211',
- 'sum;': '\u2211',
- 'sung;': '\u266a',
- 'sup1': '\xb9',
- 'sup1;': '\xb9',
- 'sup2': '\xb2',
- 'sup2;': '\xb2',
- 'sup3': '\xb3',
- 'sup3;': '\xb3',
- 'Sup;': '\u22d1',
- 'sup;': '\u2283',
- 'supdot;': '\u2abe',
- 'supdsub;': '\u2ad8',
- 'supE;': '\u2ac6',
- 'supe;': '\u2287',
- 'supedot;': '\u2ac4',
- 'Superset;': '\u2283',
- 'SupersetEqual;': '\u2287',
- 'suphsol;': '\u27c9',
- 'suphsub;': '\u2ad7',
- 'suplarr;': '\u297b',
- 'supmult;': '\u2ac2',
- 'supnE;': '\u2acc',
- 'supne;': '\u228b',
- 'supplus;': '\u2ac0',
- 'Supset;': '\u22d1',
- 'supset;': '\u2283',
- 'supseteq;': '\u2287',
- 'supseteqq;': '\u2ac6',
- 'supsetneq;': '\u228b',
- 'supsetneqq;': '\u2acc',
- 'supsim;': '\u2ac8',
- 'supsub;': '\u2ad4',
- 'supsup;': '\u2ad6',
- 'swarhk;': '\u2926',
- 'swArr;': '\u21d9',
- 'swarr;': '\u2199',
- 'swarrow;': '\u2199',
- 'swnwar;': '\u292a',
- 'szlig': '\xdf',
- 'szlig;': '\xdf',
- 'Tab;': '\t',
- 'target;': '\u2316',
- 'Tau;': '\u03a4',
- 'tau;': '\u03c4',
- 'tbrk;': '\u23b4',
- 'Tcaron;': '\u0164',
- 'tcaron;': '\u0165',
- 'Tcedil;': '\u0162',
- 'tcedil;': '\u0163',
- 'Tcy;': '\u0422',
- 'tcy;': '\u0442',
- 'tdot;': '\u20db',
- 'telrec;': '\u2315',
- 'Tfr;': '\U0001d517',
- 'tfr;': '\U0001d531',
- 'there4;': '\u2234',
- 'Therefore;': '\u2234',
- 'therefore;': '\u2234',
- 'Theta;': '\u0398',
- 'theta;': '\u03b8',
- 'thetasym;': '\u03d1',
- 'thetav;': '\u03d1',
- 'thickapprox;': '\u2248',
- 'thicksim;': '\u223c',
- 'ThickSpace;': '\u205f\u200a',
- 'thinsp;': '\u2009',
- 'ThinSpace;': '\u2009',
- 'thkap;': '\u2248',
- 'thksim;': '\u223c',
- 'THORN': '\xde',
- 'thorn': '\xfe',
- 'THORN;': '\xde',
- 'thorn;': '\xfe',
- 'Tilde;': '\u223c',
- 'tilde;': '\u02dc',
- 'TildeEqual;': '\u2243',
- 'TildeFullEqual;': '\u2245',
- 'TildeTilde;': '\u2248',
- 'times': '\xd7',
- 'times;': '\xd7',
- 'timesb;': '\u22a0',
- 'timesbar;': '\u2a31',
- 'timesd;': '\u2a30',
- 'tint;': '\u222d',
- 'toea;': '\u2928',
- 'top;': '\u22a4',
- 'topbot;': '\u2336',
- 'topcir;': '\u2af1',
- 'Topf;': '\U0001d54b',
- 'topf;': '\U0001d565',
- 'topfork;': '\u2ada',
- 'tosa;': '\u2929',
- 'tprime;': '\u2034',
- 'TRADE;': '\u2122',
- 'trade;': '\u2122',
- 'triangle;': '\u25b5',
- 'triangledown;': '\u25bf',
- 'triangleleft;': '\u25c3',
- 'trianglelefteq;': '\u22b4',
- 'triangleq;': '\u225c',
- 'triangleright;': '\u25b9',
- 'trianglerighteq;': '\u22b5',
- 'tridot;': '\u25ec',
- 'trie;': '\u225c',
- 'triminus;': '\u2a3a',
- 'TripleDot;': '\u20db',
- 'triplus;': '\u2a39',
- 'trisb;': '\u29cd',
- 'tritime;': '\u2a3b',
- 'trpezium;': '\u23e2',
- 'Tscr;': '\U0001d4af',
- 'tscr;': '\U0001d4c9',
- 'TScy;': '\u0426',
- 'tscy;': '\u0446',
- 'TSHcy;': '\u040b',
- 'tshcy;': '\u045b',
- 'Tstrok;': '\u0166',
- 'tstrok;': '\u0167',
- 'twixt;': '\u226c',
- 'twoheadleftarrow;': '\u219e',
- 'twoheadrightarrow;': '\u21a0',
- 'Uacute': '\xda',
- 'uacute': '\xfa',
- 'Uacute;': '\xda',
- 'uacute;': '\xfa',
- 'Uarr;': '\u219f',
- 'uArr;': '\u21d1',
- 'uarr;': '\u2191',
- 'Uarrocir;': '\u2949',
- 'Ubrcy;': '\u040e',
- 'ubrcy;': '\u045e',
- 'Ubreve;': '\u016c',
- 'ubreve;': '\u016d',
- 'Ucirc': '\xdb',
- 'ucirc': '\xfb',
- 'Ucirc;': '\xdb',
- 'ucirc;': '\xfb',
- 'Ucy;': '\u0423',
- 'ucy;': '\u0443',
- 'udarr;': '\u21c5',
- 'Udblac;': '\u0170',
- 'udblac;': '\u0171',
- 'udhar;': '\u296e',
- 'ufisht;': '\u297e',
- 'Ufr;': '\U0001d518',
- 'ufr;': '\U0001d532',
- 'Ugrave': '\xd9',
- 'ugrave': '\xf9',
- 'Ugrave;': '\xd9',
- 'ugrave;': '\xf9',
- 'uHar;': '\u2963',
- 'uharl;': '\u21bf',
- 'uharr;': '\u21be',
- 'uhblk;': '\u2580',
- 'ulcorn;': '\u231c',
- 'ulcorner;': '\u231c',
- 'ulcrop;': '\u230f',
- 'ultri;': '\u25f8',
- 'Umacr;': '\u016a',
- 'umacr;': '\u016b',
- 'uml': '\xa8',
- 'uml;': '\xa8',
- 'UnderBar;': '_',
- 'UnderBrace;': '\u23df',
- 'UnderBracket;': '\u23b5',
- 'UnderParenthesis;': '\u23dd',
- 'Union;': '\u22c3',
- 'UnionPlus;': '\u228e',
- 'Uogon;': '\u0172',
- 'uogon;': '\u0173',
- 'Uopf;': '\U0001d54c',
- 'uopf;': '\U0001d566',
- 'UpArrow;': '\u2191',
- 'Uparrow;': '\u21d1',
- 'uparrow;': '\u2191',
- 'UpArrowBar;': '\u2912',
- 'UpArrowDownArrow;': '\u21c5',
- 'UpDownArrow;': '\u2195',
- 'Updownarrow;': '\u21d5',
- 'updownarrow;': '\u2195',
- 'UpEquilibrium;': '\u296e',
- 'upharpoonleft;': '\u21bf',
- 'upharpoonright;': '\u21be',
- 'uplus;': '\u228e',
- 'UpperLeftArrow;': '\u2196',
- 'UpperRightArrow;': '\u2197',
- 'Upsi;': '\u03d2',
- 'upsi;': '\u03c5',
- 'upsih;': '\u03d2',
- 'Upsilon;': '\u03a5',
- 'upsilon;': '\u03c5',
- 'UpTee;': '\u22a5',
- 'UpTeeArrow;': '\u21a5',
- 'upuparrows;': '\u21c8',
- 'urcorn;': '\u231d',
- 'urcorner;': '\u231d',
- 'urcrop;': '\u230e',
- 'Uring;': '\u016e',
- 'uring;': '\u016f',
- 'urtri;': '\u25f9',
- 'Uscr;': '\U0001d4b0',
- 'uscr;': '\U0001d4ca',
- 'utdot;': '\u22f0',
- 'Utilde;': '\u0168',
- 'utilde;': '\u0169',
- 'utri;': '\u25b5',
- 'utrif;': '\u25b4',
- 'uuarr;': '\u21c8',
- 'Uuml': '\xdc',
- 'uuml': '\xfc',
- 'Uuml;': '\xdc',
- 'uuml;': '\xfc',
- 'uwangle;': '\u29a7',
- 'vangrt;': '\u299c',
- 'varepsilon;': '\u03f5',
- 'varkappa;': '\u03f0',
- 'varnothing;': '\u2205',
- 'varphi;': '\u03d5',
- 'varpi;': '\u03d6',
- 'varpropto;': '\u221d',
- 'vArr;': '\u21d5',
- 'varr;': '\u2195',
- 'varrho;': '\u03f1',
- 'varsigma;': '\u03c2',
- 'varsubsetneq;': '\u228a\ufe00',
- 'varsubsetneqq;': '\u2acb\ufe00',
- 'varsupsetneq;': '\u228b\ufe00',
- 'varsupsetneqq;': '\u2acc\ufe00',
- 'vartheta;': '\u03d1',
- 'vartriangleleft;': '\u22b2',
- 'vartriangleright;': '\u22b3',
- 'Vbar;': '\u2aeb',
- 'vBar;': '\u2ae8',
- 'vBarv;': '\u2ae9',
- 'Vcy;': '\u0412',
- 'vcy;': '\u0432',
- 'VDash;': '\u22ab',
- 'Vdash;': '\u22a9',
- 'vDash;': '\u22a8',
- 'vdash;': '\u22a2',
- 'Vdashl;': '\u2ae6',
- 'Vee;': '\u22c1',
- 'vee;': '\u2228',
- 'veebar;': '\u22bb',
- 'veeeq;': '\u225a',
- 'vellip;': '\u22ee',
- 'Verbar;': '\u2016',
- 'verbar;': '|',
- 'Vert;': '\u2016',
- 'vert;': '|',
- 'VerticalBar;': '\u2223',
- 'VerticalLine;': '|',
- 'VerticalSeparator;': '\u2758',
- 'VerticalTilde;': '\u2240',
- 'VeryThinSpace;': '\u200a',
- 'Vfr;': '\U0001d519',
- 'vfr;': '\U0001d533',
- 'vltri;': '\u22b2',
- 'vnsub;': '\u2282\u20d2',
- 'vnsup;': '\u2283\u20d2',
- 'Vopf;': '\U0001d54d',
- 'vopf;': '\U0001d567',
- 'vprop;': '\u221d',
- 'vrtri;': '\u22b3',
- 'Vscr;': '\U0001d4b1',
- 'vscr;': '\U0001d4cb',
- 'vsubnE;': '\u2acb\ufe00',
- 'vsubne;': '\u228a\ufe00',
- 'vsupnE;': '\u2acc\ufe00',
- 'vsupne;': '\u228b\ufe00',
- 'Vvdash;': '\u22aa',
- 'vzigzag;': '\u299a',
- 'Wcirc;': '\u0174',
- 'wcirc;': '\u0175',
- 'wedbar;': '\u2a5f',
- 'Wedge;': '\u22c0',
- 'wedge;': '\u2227',
- 'wedgeq;': '\u2259',
- 'weierp;': '\u2118',
- 'Wfr;': '\U0001d51a',
- 'wfr;': '\U0001d534',
- 'Wopf;': '\U0001d54e',
- 'wopf;': '\U0001d568',
- 'wp;': '\u2118',
- 'wr;': '\u2240',
- 'wreath;': '\u2240',
- 'Wscr;': '\U0001d4b2',
- 'wscr;': '\U0001d4cc',
- 'xcap;': '\u22c2',
- 'xcirc;': '\u25ef',
- 'xcup;': '\u22c3',
- 'xdtri;': '\u25bd',
- 'Xfr;': '\U0001d51b',
- 'xfr;': '\U0001d535',
- 'xhArr;': '\u27fa',
- 'xharr;': '\u27f7',
- 'Xi;': '\u039e',
- 'xi;': '\u03be',
- 'xlArr;': '\u27f8',
- 'xlarr;': '\u27f5',
- 'xmap;': '\u27fc',
- 'xnis;': '\u22fb',
- 'xodot;': '\u2a00',
- 'Xopf;': '\U0001d54f',
- 'xopf;': '\U0001d569',
- 'xoplus;': '\u2a01',
- 'xotime;': '\u2a02',
- 'xrArr;': '\u27f9',
- 'xrarr;': '\u27f6',
- 'Xscr;': '\U0001d4b3',
- 'xscr;': '\U0001d4cd',
- 'xsqcup;': '\u2a06',
- 'xuplus;': '\u2a04',
- 'xutri;': '\u25b3',
- 'xvee;': '\u22c1',
- 'xwedge;': '\u22c0',
- 'Yacute': '\xdd',
- 'yacute': '\xfd',
- 'Yacute;': '\xdd',
- 'yacute;': '\xfd',
- 'YAcy;': '\u042f',
- 'yacy;': '\u044f',
- 'Ycirc;': '\u0176',
- 'ycirc;': '\u0177',
- 'Ycy;': '\u042b',
- 'ycy;': '\u044b',
- 'yen': '\xa5',
- 'yen;': '\xa5',
- 'Yfr;': '\U0001d51c',
- 'yfr;': '\U0001d536',
- 'YIcy;': '\u0407',
- 'yicy;': '\u0457',
- 'Yopf;': '\U0001d550',
- 'yopf;': '\U0001d56a',
- 'Yscr;': '\U0001d4b4',
- 'yscr;': '\U0001d4ce',
- 'YUcy;': '\u042e',
- 'yucy;': '\u044e',
- 'yuml': '\xff',
- 'Yuml;': '\u0178',
- 'yuml;': '\xff',
- 'Zacute;': '\u0179',
- 'zacute;': '\u017a',
- 'Zcaron;': '\u017d',
- 'zcaron;': '\u017e',
- 'Zcy;': '\u0417',
- 'zcy;': '\u0437',
- 'Zdot;': '\u017b',
- 'zdot;': '\u017c',
- 'zeetrf;': '\u2128',
- 'ZeroWidthSpace;': '\u200b',
- 'Zeta;': '\u0396',
- 'zeta;': '\u03b6',
- 'Zfr;': '\u2128',
- 'zfr;': '\U0001d537',
- 'ZHcy;': '\u0416',
- 'zhcy;': '\u0436',
- 'zigrarr;': '\u21dd',
- 'Zopf;': '\u2124',
- 'zopf;': '\U0001d56b',
- 'Zscr;': '\U0001d4b5',
- 'zscr;': '\U0001d4cf',
- 'zwj;': '\u200d',
- 'zwnj;': '\u200c',
- }
-
-try:
- import http.client as compat_http_client
-except ImportError: # Python 2
- import httplib as compat_http_client
-
-try:
- from urllib.error import HTTPError as compat_HTTPError
-except ImportError: # Python 2
- from urllib2 import HTTPError as compat_HTTPError
-
-try:
- from urllib.request import urlretrieve as compat_urlretrieve
-except ImportError: # Python 2
- from urllib import urlretrieve as compat_urlretrieve
-
-try:
- from html.parser import HTMLParser as compat_HTMLParser
-except ImportError: # Python 2
- from HTMLParser import HTMLParser as compat_HTMLParser
-
-try: # Python 2
- from HTMLParser import HTMLParseError as compat_HTMLParseError
-except ImportError: # Python <3.4
- try:
- from html.parser import HTMLParseError as compat_HTMLParseError
- except ImportError: # Python >3.4
-
- # HTMLParseError has been deprecated in Python 3.3 and removed in
- # Python 3.5. Introducing dummy exception for Python >3.5 for compatible
- # and uniform cross-version exceptiong handling
- class compat_HTMLParseError(Exception):
- pass
-
-try:
- from subprocess import DEVNULL
- compat_subprocess_get_DEVNULL = lambda: DEVNULL
-except ImportError:
- compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
-
-try:
- import http.server as compat_http_server
-except ImportError:
- import BaseHTTPServer as compat_http_server
-
-try:
- compat_str = unicode # Python 2
-except NameError:
- compat_str = str
-
-try:
- from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
- from urllib.parse import unquote as compat_urllib_parse_unquote
- from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
-except ImportError: # Python 2
- _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
- else re.compile(r'([\x00-\x7f]+)'))
-
- # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
- # implementations from cpython 3.4.3's stdlib. Python 2's version
- # is apparently broken (see https://github.com/ytdl-org/youtube-dl/pull/6244)
-
- def compat_urllib_parse_unquote_to_bytes(string):
- """unquote_to_bytes('abc%20def') -> b'abc def'."""
- # Note: strings are encoded as UTF-8. This is only an issue if it contains
- # unescaped non-ASCII characters, which URIs should not.
- if not string:
- # Is it a string-like object?
- string.split
- return b''
- if isinstance(string, compat_str):
- string = string.encode('utf-8')
- bits = string.split(b'%')
- if len(bits) == 1:
- return string
- res = [bits[0]]
- append = res.append
- for item in bits[1:]:
- try:
- append(compat_urllib_parse._hextochr[item[:2]])
- append(item[2:])
- except KeyError:
- append(b'%')
- append(item)
- return b''.join(res)
-
- def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
- """Replace %xx escapes by their single-character equivalent. The optional
- encoding and errors parameters specify how to decode percent-encoded
- sequences into Unicode characters, as accepted by the bytes.decode()
- method.
- By default, percent-encoded sequences are decoded with UTF-8, and invalid
- sequences are replaced by a placeholder character.
-
- unquote('abc%20def') -> 'abc def'.
- """
- if '%' not in string:
- string.split
- return string
- if encoding is None:
- encoding = 'utf-8'
- if errors is None:
- errors = 'replace'
- bits = _asciire.split(string)
- res = [bits[0]]
- append = res.append
- for i in range(1, len(bits), 2):
- append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
- append(bits[i + 1])
- return ''.join(res)
-
- def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
- """Like unquote(), but also replace plus signs by spaces, as required for
- unquoting HTML form values.
-
- unquote_plus('%7e/abc+def') -> '~/abc def'
- """
- string = string.replace('+', ' ')
- return compat_urllib_parse_unquote(string, encoding, errors)
-
-try:
- from urllib.parse import urlencode as compat_urllib_parse_urlencode
-except ImportError: # Python 2
- # Python 2 will choke in urlencode on mixture of byte and unicode strings.
- # Possible solutions are to either port it from python 3 with all
- # the friends or manually ensure input query contains only byte strings.
- # We will stick with latter thus recursively encoding the whole query.
- def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'):
- def encode_elem(e):
- if isinstance(e, dict):
- e = encode_dict(e)
- elif isinstance(e, (list, tuple,)):
- list_e = encode_list(e)
- e = tuple(list_e) if isinstance(e, tuple) else list_e
- elif isinstance(e, compat_str):
- e = e.encode(encoding)
- return e
-
- def encode_dict(d):
- return dict((encode_elem(k), encode_elem(v)) for k, v in d.items())
-
- def encode_list(l):
- return [encode_elem(e) for e in l]
-
- return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq)
-
-try:
- from urllib.request import DataHandler as compat_urllib_request_DataHandler
-except ImportError: # Python < 3.4
- # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
- class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler):
- def data_open(self, req):
- # data URLs as specified in RFC 2397.
- #
- # ignores POSTed data
- #
- # syntax:
- # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
- # mediatype := [ type "/" subtype ] *( ";" parameter )
- # data := *urlchar
- # parameter := attribute "=" value
- url = req.get_full_url()
-
- scheme, data = url.split(':', 1)
- mediatype, data = data.split(',', 1)
-
- # even base64 encoded data URLs might be quoted so unquote in any case:
- data = compat_urllib_parse_unquote_to_bytes(data)
- if mediatype.endswith(';base64'):
- data = binascii.a2b_base64(data)
- mediatype = mediatype[:-7]
-
- if not mediatype:
- mediatype = 'text/plain;charset=US-ASCII'
-
- headers = email.message_from_string(
- 'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data)))
-
- return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
-
-try:
- compat_basestring = basestring # Python 2
-except NameError:
- compat_basestring = str
-
-try:
- compat_chr = unichr # Python 2
-except NameError:
- compat_chr = chr
-
-try:
- from xml.etree.ElementTree import ParseError as compat_xml_parse_error
-except ImportError: # Python 2.6
- from xml.parsers.expat import ExpatError as compat_xml_parse_error
-
-
-etree = xml.etree.ElementTree
-
-
-class _TreeBuilder(etree.TreeBuilder):
- def doctype(self, name, pubid, system):
- pass
-
-
-try:
- # xml.etree.ElementTree.Element is a method in Python <=2.6 and
- # the following will crash with:
- # TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
- isinstance(None, xml.etree.ElementTree.Element)
- from xml.etree.ElementTree import Element as compat_etree_Element
-except TypeError: # Python <=2.6
- from xml.etree.ElementTree import _ElementInterface as compat_etree_Element
-
-if sys.version_info[0] >= 3:
- def compat_etree_fromstring(text):
- return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
-else:
- # python 2.x tries to encode unicode strings with ascii (see the
- # XMLParser._fixtext method)
- try:
- _etree_iter = etree.Element.iter
- except AttributeError: # Python <=2.6
- def _etree_iter(root):
- for el in root.findall('*'):
- yield el
- for sub in _etree_iter(el):
- yield sub
-
- # on 2.6 XML doesn't have a parser argument, function copied from CPython
- # 2.7 source
- def _XML(text, parser=None):
- if not parser:
- parser = etree.XMLParser(target=_TreeBuilder())
- parser.feed(text)
- return parser.close()
-
- def _element_factory(*args, **kwargs):
- el = etree.Element(*args, **kwargs)
- for k, v in el.items():
- if isinstance(v, bytes):
- el.set(k, v.decode('utf-8'))
- return el
-
- def compat_etree_fromstring(text):
- doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
- for el in _etree_iter(doc):
- if el.text is not None and isinstance(el.text, bytes):
- el.text = el.text.decode('utf-8')
- return doc
-
-if hasattr(etree, 'register_namespace'):
- compat_etree_register_namespace = etree.register_namespace
-else:
- def compat_etree_register_namespace(prefix, uri):
- """Register a namespace prefix.
- The registry is global, and any existing mapping for either the
- given prefix or the namespace URI will be removed.
- *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
- attributes in this namespace will be serialized with prefix if possible.
- ValueError is raised if prefix is reserved or is invalid.
- """
- if re.match(r"ns\d+$", prefix):
- raise ValueError("Prefix format reserved for internal use")
- for k, v in list(etree._namespace_map.items()):
- if k == uri or v == prefix:
- del etree._namespace_map[k]
- etree._namespace_map[uri] = prefix
-
-if sys.version_info < (2, 7):
- # Here comes the crazy part: In 2.6, if the xpath is a unicode,
- # .//node does not match if a node is a direct child of . !
- def compat_xpath(xpath):
- if isinstance(xpath, compat_str):
- xpath = xpath.encode('ascii')
- return xpath
-else:
- compat_xpath = lambda xpath: xpath
-
-try:
- from urllib.parse import parse_qs as compat_parse_qs
-except ImportError: # Python 2
- # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
- # Python 2's version is apparently totally broken
-
- def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
- encoding='utf-8', errors='replace'):
- qs, _coerce_result = qs, compat_str
- pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
- r = []
- for name_value in pairs:
- if not name_value and not strict_parsing:
- continue
- nv = name_value.split('=', 1)
- if len(nv) != 2:
- if strict_parsing:
- raise ValueError('bad query field: %r' % (name_value,))
- # Handle case of a control-name with no equal sign
- if keep_blank_values:
- nv.append('')
- else:
- continue
- if len(nv[1]) or keep_blank_values:
- name = nv[0].replace('+', ' ')
- name = compat_urllib_parse_unquote(
- name, encoding=encoding, errors=errors)
- name = _coerce_result(name)
- value = nv[1].replace('+', ' ')
- value = compat_urllib_parse_unquote(
- value, encoding=encoding, errors=errors)
- value = _coerce_result(value)
- r.append((name, value))
- return r
-
- def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
- encoding='utf-8', errors='replace'):
- parsed_result = {}
- pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
- encoding=encoding, errors=errors)
- for name, value in pairs:
- if name in parsed_result:
- parsed_result[name].append(value)
- else:
- parsed_result[name] = [value]
- return parsed_result
-
-
-compat_os_name = os._name if os.name == 'java' else os.name
-
-
-if compat_os_name == 'nt':
- def compat_shlex_quote(s):
- return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
-else:
- try:
- from shlex import quote as compat_shlex_quote
- except ImportError: # Python < 3.3
- def compat_shlex_quote(s):
- if re.match(r'^[-_\w./]+$', s):
- return s
- else:
- return "'" + s.replace("'", "'\"'\"'") + "'"
-
-
-try:
- args = shlex.split('中文')
- assert (isinstance(args, list)
- and isinstance(args[0], compat_str)
- and args[0] == '中文')
- compat_shlex_split = shlex.split
-except (AssertionError, UnicodeEncodeError):
- # Working around shlex issue with unicode strings on some python 2
- # versions (see http://bugs.python.org/issue1548891)
- def compat_shlex_split(s, comments=False, posix=True):
- if isinstance(s, compat_str):
- s = s.encode('utf-8')
- return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix)))
-
-
-def compat_ord(c):
- if type(c) is int:
- return c
- else:
- return ord(c)
-
-
-if sys.version_info >= (3, 0):
- compat_getenv = os.getenv
- compat_expanduser = os.path.expanduser
-
- def compat_setenv(key, value, env=os.environ):
- env[key] = value
-else:
- # Environment variables should be decoded with filesystem encoding.
- # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
-
- def compat_getenv(key, default=None):
- from .utils import get_filesystem_encoding
- env = os.getenv(key, default)
- if env:
- env = env.decode(get_filesystem_encoding())
- return env
-
- def compat_setenv(key, value, env=os.environ):
- def encode(v):
- from .utils import get_filesystem_encoding
- return v.encode(get_filesystem_encoding()) if isinstance(v, compat_str) else v
- env[encode(key)] = encode(value)
-
- # HACK: The default implementations of os.path.expanduser from cpython do not decode
- # environment variables with filesystem encoding. We will work around this by
- # providing adjusted implementations.
- # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
- # for different platforms with correct environment variables decoding.
-
- if compat_os_name == 'posix':
- def compat_expanduser(path):
- """Expand ~ and ~user constructions. If user or $HOME is unknown,
- do nothing."""
- if not path.startswith('~'):
- return path
- i = path.find('/', 1)
- if i < 0:
- i = len(path)
- if i == 1:
- if 'HOME' not in os.environ:
- import pwd
- userhome = pwd.getpwuid(os.getuid()).pw_dir
- else:
- userhome = compat_getenv('HOME')
- else:
- import pwd
- try:
- pwent = pwd.getpwnam(path[1:i])
- except KeyError:
- return path
- userhome = pwent.pw_dir
- userhome = userhome.rstrip('/')
- return (userhome + path[i:]) or '/'
- elif compat_os_name in ('nt', 'ce'):
- def compat_expanduser(path):
- """Expand ~ and ~user constructs.
-
- If user or $HOME is unknown, do nothing."""
- if path[:1] != '~':
- return path
- i, n = 1, len(path)
- while i < n and path[i] not in '/\\':
- i = i + 1
-
- if 'HOME' in os.environ:
- userhome = compat_getenv('HOME')
- elif 'USERPROFILE' in os.environ:
- userhome = compat_getenv('USERPROFILE')
- elif 'HOMEPATH' not in os.environ:
- return path
- else:
- try:
- drive = compat_getenv('HOMEDRIVE')
- except KeyError:
- drive = ''
- userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
-
- if i != 1: # ~user
- userhome = os.path.join(os.path.dirname(userhome), path[1:i])
-
- return userhome + path[i:]
- else:
- compat_expanduser = os.path.expanduser
-
-
-if sys.version_info < (3, 0):
- def compat_print(s):
- from .utils import preferredencoding
- print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
-else:
- def compat_print(s):
- assert isinstance(s, compat_str)
- print(s)
-
-
-if sys.version_info < (3, 0) and sys.platform == 'win32':
- def compat_getpass(prompt, *args, **kwargs):
- if isinstance(prompt, compat_str):
- from .utils import preferredencoding
- prompt = prompt.encode(preferredencoding())
- return getpass.getpass(prompt, *args, **kwargs)
-else:
- compat_getpass = getpass.getpass
-
-try:
- compat_input = raw_input
-except NameError: # Python 3
- compat_input = input
-
-# Python < 2.6.5 require kwargs to be bytes
-try:
- def _testfunc(x):
- pass
- _testfunc(**{'x': 0})
-except TypeError:
- def compat_kwargs(kwargs):
- return dict((bytes(k), v) for k, v in kwargs.items())
-else:
- compat_kwargs = lambda kwargs: kwargs
-
-
-try:
- compat_numeric_types = (int, float, long, complex)
-except NameError: # Python 3
- compat_numeric_types = (int, float, complex)
-
-
-try:
- compat_integer_types = (int, long)
-except NameError: # Python 3
- compat_integer_types = (int, )
-
-
-if sys.version_info < (2, 7):
- def compat_socket_create_connection(address, timeout, source_address=None):
- host, port = address
- err = None
- for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
- af, socktype, proto, canonname, sa = res
- sock = None
- try:
- sock = socket.socket(af, socktype, proto)
- sock.settimeout(timeout)
- if source_address:
- sock.bind(source_address)
- sock.connect(sa)
- return sock
- except socket.error as _:
- err = _
- if sock is not None:
- sock.close()
- if err is not None:
- raise err
- else:
- raise socket.error('getaddrinfo returns an empty list')
-else:
- compat_socket_create_connection = socket.create_connection
-
-
-# Fix https://github.com/ytdl-org/youtube-dl/issues/4223
-# See http://bugs.python.org/issue9161 for what is broken
-def workaround_optparse_bug9161():
- op = optparse.OptionParser()
- og = optparse.OptionGroup(op, 'foo')
- try:
- og.add_option('-t')
- except TypeError:
- real_add_option = optparse.OptionGroup.add_option
-
- def _compat_add_option(self, *args, **kwargs):
- enc = lambda v: (
- v.encode('ascii', 'replace') if isinstance(v, compat_str)
- else v)
- bargs = [enc(a) for a in args]
- bkwargs = dict(
- (k, enc(v)) for k, v in kwargs.items())
- return real_add_option(self, *bargs, **bkwargs)
- optparse.OptionGroup.add_option = _compat_add_option
-
-
-if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
- compat_get_terminal_size = shutil.get_terminal_size
-else:
- _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
-
- def compat_get_terminal_size(fallback=(80, 24)):
- columns = compat_getenv('COLUMNS')
- if columns:
- columns = int(columns)
- else:
- columns = None
- lines = compat_getenv('LINES')
- if lines:
- lines = int(lines)
- else:
- lines = None
-
- if columns is None or lines is None or columns <= 0 or lines <= 0:
- try:
- sp = subprocess.Popen(
- ['stty', 'size'],
- stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- out, err = sp.communicate()
- _lines, _columns = map(int, out.split())
- except Exception:
- _columns, _lines = _terminal_size(*fallback)
-
- if columns is None or columns <= 0:
- columns = _columns
- if lines is None or lines <= 0:
- lines = _lines
- return _terminal_size(columns, lines)
-
-try:
- itertools.count(start=0, step=1)
- compat_itertools_count = itertools.count
-except TypeError: # Python 2.6
- def compat_itertools_count(start=0, step=1):
- n = start
- while True:
- yield n
- n += step
-
-if sys.version_info >= (3, 0):
- from tokenize import tokenize as compat_tokenize_tokenize
-else:
- from tokenize import generate_tokens as compat_tokenize_tokenize
-
-
-try:
- struct.pack('!I', 0)
-except TypeError:
- # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
- # See https://bugs.python.org/issue19099
- def compat_struct_pack(spec, *args):
- if isinstance(spec, compat_str):
- spec = spec.encode('ascii')
- return struct.pack(spec, *args)
-
- def compat_struct_unpack(spec, *args):
- if isinstance(spec, compat_str):
- spec = spec.encode('ascii')
- return struct.unpack(spec, *args)
-
- class compat_Struct(struct.Struct):
- def __init__(self, fmt):
- if isinstance(fmt, compat_str):
- fmt = fmt.encode('ascii')
- super(compat_Struct, self).__init__(fmt)
-else:
- compat_struct_pack = struct.pack
- compat_struct_unpack = struct.unpack
- if platform.python_implementation() == 'IronPython' and sys.version_info < (2, 7, 8):
- class compat_Struct(struct.Struct):
- def unpack(self, string):
- if not isinstance(string, buffer): # noqa: F821
- string = buffer(string) # noqa: F821
- return super(compat_Struct, self).unpack(string)
- else:
- compat_Struct = struct.Struct
-
-
-try:
- from future_builtins import zip as compat_zip
-except ImportError: # not 2.6+ or is 3.x
- try:
- from itertools import izip as compat_zip # < 2.5 or 3.x
- except ImportError:
- compat_zip = zip
-
-
-if sys.version_info < (3, 3):
- def compat_b64decode(s, *args, **kwargs):
- if isinstance(s, compat_str):
- s = s.encode('ascii')
- return base64.b64decode(s, *args, **kwargs)
-else:
- compat_b64decode = base64.b64decode
-
-
-if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
- # PyPy2 prior to version 5.4.0 expects byte strings as Windows function
- # names, see the original PyPy issue [1] and the youtube-dl one [2].
- # 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name
- # 2. https://github.com/ytdl-org/youtube-dl/pull/4392
- def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
- real = ctypes.WINFUNCTYPE(*args, **kwargs)
-
- def resf(tpl, *args, **kwargs):
- funcname, dll = tpl
- return real((str(funcname), dll), *args, **kwargs)
-
- return resf
-else:
- def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
- return ctypes.WINFUNCTYPE(*args, **kwargs)
-
-
-__all__ = [
- 'compat_HTMLParseError',
- 'compat_HTMLParser',
- 'compat_HTTPError',
- 'compat_Struct',
- 'compat_b64decode',
- 'compat_basestring',
- 'compat_chr',
- 'compat_cookiejar',
- 'compat_cookies',
- 'compat_ctypes_WINFUNCTYPE',
- 'compat_etree_Element',
- 'compat_etree_fromstring',
- 'compat_etree_register_namespace',
- 'compat_expanduser',
- 'compat_get_terminal_size',
- 'compat_getenv',
- 'compat_getpass',
- 'compat_html_entities',
- 'compat_html_entities_html5',
- 'compat_http_client',
- 'compat_http_server',
- 'compat_input',
- 'compat_integer_types',
- 'compat_itertools_count',
- 'compat_kwargs',
- 'compat_numeric_types',
- 'compat_ord',
- 'compat_os_name',
- 'compat_parse_qs',
- 'compat_print',
- 'compat_setenv',
- 'compat_shlex_quote',
- 'compat_shlex_split',
- 'compat_socket_create_connection',
- 'compat_str',
- 'compat_struct_pack',
- 'compat_struct_unpack',
- 'compat_subprocess_get_DEVNULL',
- 'compat_tokenize_tokenize',
- 'compat_urllib_error',
- 'compat_urllib_parse',
- 'compat_urllib_parse_unquote',
- 'compat_urllib_parse_unquote_plus',
- 'compat_urllib_parse_unquote_to_bytes',
- 'compat_urllib_parse_urlencode',
- 'compat_urllib_parse_urlparse',
- 'compat_urllib_request',
- 'compat_urllib_request_DataHandler',
- 'compat_urllib_response',
- 'compat_urlparse',
- 'compat_urlretrieve',
- 'compat_xml_parse_error',
- 'compat_xpath',
- 'compat_zip',
- 'workaround_optparse_bug9161',
-]
diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py
deleted file mode 100644
index 2e485df9d..000000000
--- a/youtube_dl/downloader/__init__.py
+++ /dev/null
@@ -1,61 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import FileDownloader
-from .f4m import F4mFD
-from .hls import HlsFD
-from .http import HttpFD
-from .rtmp import RtmpFD
-from .dash import DashSegmentsFD
-from .rtsp import RtspFD
-from .ism import IsmFD
-from .external import (
- get_external_downloader,
- FFmpegFD,
-)
-
-from ..utils import (
- determine_protocol,
-)
-
-PROTOCOL_MAP = {
- 'rtmp': RtmpFD,
- 'm3u8_native': HlsFD,
- 'm3u8': FFmpegFD,
- 'mms': RtspFD,
- 'rtsp': RtspFD,
- 'f4m': F4mFD,
- 'http_dash_segments': DashSegmentsFD,
- 'ism': IsmFD,
-}
-
-
-def get_suitable_downloader(info_dict, params={}):
- """Get the downloader class that can handle the info dict."""
- protocol = determine_protocol(info_dict)
- info_dict['protocol'] = protocol
-
- # if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
- # return FFmpegFD
-
- external_downloader = params.get('external_downloader')
- if external_downloader is not None:
- ed = get_external_downloader(external_downloader)
- if ed.can_download(info_dict):
- return ed
-
- if protocol.startswith('m3u8') and info_dict.get('is_live'):
- return FFmpegFD
-
- if protocol == 'm3u8' and params.get('hls_prefer_native') is True:
- return HlsFD
-
- if protocol == 'm3u8_native' and params.get('hls_prefer_native') is False:
- return FFmpegFD
-
- return PROTOCOL_MAP.get(protocol, HttpFD)
-
-
-__all__ = [
- 'get_suitable_downloader',
- 'FileDownloader',
-]
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py
deleted file mode 100644
index 1cdba89cd..000000000
--- a/youtube_dl/downloader/common.py
+++ /dev/null
@@ -1,391 +0,0 @@
-from __future__ import division, unicode_literals
-
-import os
-import re
-import sys
-import time
-import random
-
-from ..compat import compat_os_name
-from ..utils import (
- decodeArgument,
- encodeFilename,
- error_to_compat_str,
- format_bytes,
- shell_quote,
- timeconvert,
-)
-
-
-class FileDownloader(object):
- """File Downloader class.
-
- File downloader objects are the ones responsible of downloading the
- actual video file and writing it to disk.
-
- File downloaders accept a lot of parameters. In order not to saturate
- the object constructor with arguments, it receives a dictionary of
- options instead.
-
- Available options:
-
- verbose: Print additional info to stdout.
- quiet: Do not print messages to stdout.
- ratelimit: Download speed limit, in bytes/sec.
- retries: Number of times to retry for HTTP error 5xx
- buffersize: Size of download buffer in bytes.
- noresizebuffer: Do not automatically resize the download buffer.
- continuedl: Try to continue downloads if possible.
- noprogress: Do not print the progress bar.
- logtostderr: Log messages to stderr instead of stdout.
- consoletitle: Display progress in console window's titlebar.
- nopart: Do not use temporary .part files.
- updatetime: Use the Last-modified header to set output file timestamps.
- test: Download only first bytes to test the downloader.
- min_filesize: Skip files smaller than this size
- max_filesize: Skip files larger than this size
- xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
- external_downloader_args: A list of additional command-line arguments for the
- external downloader.
- hls_use_mpegts: Use the mpegts container for HLS videos.
- http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
- useful for bypassing bandwidth throttling imposed by
- a webserver (experimental)
-
- Subclasses of this one must re-define the real_download method.
- """
-
- _TEST_FILE_SIZE = 10241
- params = None
-
- def __init__(self, ydl, params):
- """Create a FileDownloader object with the given options."""
- self.ydl = ydl
- self._progress_hooks = []
- self.params = params
- self.add_progress_hook(self.report_progress)
-
- @staticmethod
- def format_seconds(seconds):
- (mins, secs) = divmod(seconds, 60)
- (hours, mins) = divmod(mins, 60)
- if hours > 99:
- return '--:--:--'
- if hours == 0:
- return '%02d:%02d' % (mins, secs)
- else:
- return '%02d:%02d:%02d' % (hours, mins, secs)
-
- @staticmethod
- def calc_percent(byte_counter, data_len):
- if data_len is None:
- return None
- return float(byte_counter) / float(data_len) * 100.0
-
- @staticmethod
- def format_percent(percent):
- if percent is None:
- return '---.-%'
- return '%6s' % ('%3.1f%%' % percent)
-
- @staticmethod
- def calc_eta(start, now, total, current):
- if total is None:
- return None
- if now is None:
- now = time.time()
- dif = now - start
- if current == 0 or dif < 0.001: # One millisecond
- return None
- rate = float(current) / dif
- return int((float(total) - float(current)) / rate)
-
- @staticmethod
- def format_eta(eta):
- if eta is None:
- return '--:--'
- return FileDownloader.format_seconds(eta)
-
- @staticmethod
- def calc_speed(start, now, bytes):
- dif = now - start
- if bytes == 0 or dif < 0.001: # One millisecond
- return None
- return float(bytes) / dif
-
- @staticmethod
- def format_speed(speed):
- if speed is None:
- return '%10s' % '---b/s'
- return '%10s' % ('%s/s' % format_bytes(speed))
-
- @staticmethod
- def format_retries(retries):
- return 'inf' if retries == float('inf') else '%.0f' % retries
-
- @staticmethod
- def best_block_size(elapsed_time, bytes):
- new_min = max(bytes / 2.0, 1.0)
- new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
- if elapsed_time < 0.001:
- return int(new_max)
- rate = bytes / elapsed_time
- if rate > new_max:
- return int(new_max)
- if rate < new_min:
- return int(new_min)
- return int(rate)
-
- @staticmethod
- def parse_bytes(bytestr):
- """Parse a string indicating a byte quantity into an integer."""
- matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
- if matchobj is None:
- return None
- number = float(matchobj.group(1))
- multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
- return int(round(number * multiplier))
-
- def to_screen(self, *args, **kargs):
- self.ydl.to_screen(*args, **kargs)
-
- def to_stderr(self, message):
- self.ydl.to_screen(message)
-
- def to_console_title(self, message):
- self.ydl.to_console_title(message)
-
- def trouble(self, *args, **kargs):
- self.ydl.trouble(*args, **kargs)
-
- def report_warning(self, *args, **kargs):
- self.ydl.report_warning(*args, **kargs)
-
- def report_error(self, *args, **kargs):
- self.ydl.report_error(*args, **kargs)
-
- def slow_down(self, start_time, now, byte_counter):
- """Sleep if the download speed is over the rate limit."""
- rate_limit = self.params.get('ratelimit')
- if rate_limit is None or byte_counter == 0:
- return
- if now is None:
- now = time.time()
- elapsed = now - start_time
- if elapsed <= 0.0:
- return
- speed = float(byte_counter) / elapsed
- if speed > rate_limit:
- sleep_time = float(byte_counter) / rate_limit - elapsed
- if sleep_time > 0:
- time.sleep(sleep_time)
-
- def temp_name(self, filename):
- """Returns a temporary filename for the given filename."""
- if self.params.get('nopart', False) or filename == '-' or \
- (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
- return filename
- return filename + '.part'
-
- def undo_temp_name(self, filename):
- if filename.endswith('.part'):
- return filename[:-len('.part')]
- return filename
-
- def ytdl_filename(self, filename):
- return filename + '.ytdl'
-
- def try_rename(self, old_filename, new_filename):
- try:
- if old_filename == new_filename:
- return
- os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
- except (IOError, OSError) as err:
- self.report_error('unable to rename file: %s' % error_to_compat_str(err))
-
- def try_utime(self, filename, last_modified_hdr):
- """Try to set the last-modified time of the given file."""
- if last_modified_hdr is None:
- return
- if not os.path.isfile(encodeFilename(filename)):
- return
- timestr = last_modified_hdr
- if timestr is None:
- return
- filetime = timeconvert(timestr)
- if filetime is None:
- return filetime
- # Ignore obviously invalid dates
- if filetime == 0:
- return
- try:
- os.utime(filename, (time.time(), filetime))
- except Exception:
- pass
- return filetime
-
- def report_destination(self, filename):
- """Report destination filename."""
- self.to_screen('[download] Destination: ' + filename)
-
- def _report_progress_status(self, msg, is_last_line=False):
- fullmsg = '[download] ' + msg
- if self.params.get('progress_with_newline', False):
- self.to_screen(fullmsg)
- else:
- if compat_os_name == 'nt':
- prev_len = getattr(self, '_report_progress_prev_line_length',
- 0)
- if prev_len > len(fullmsg):
- fullmsg += ' ' * (prev_len - len(fullmsg))
- self._report_progress_prev_line_length = len(fullmsg)
- clear_line = '\r'
- else:
- clear_line = ('\r\x1b[K' if sys.stderr.isatty() else '\r')
- self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
- self.to_console_title('youtube-dl ' + msg)
-
- def report_progress(self, s):
- if s['status'] == 'finished':
- if self.params.get('noprogress', False):
- self.to_screen('[download] Download completed')
- else:
- msg_template = '100%%'
- if s.get('total_bytes') is not None:
- s['_total_bytes_str'] = format_bytes(s['total_bytes'])
- msg_template += ' of %(_total_bytes_str)s'
- if s.get('elapsed') is not None:
- s['_elapsed_str'] = self.format_seconds(s['elapsed'])
- msg_template += ' in %(_elapsed_str)s'
- self._report_progress_status(
- msg_template % s, is_last_line=True)
-
- if self.params.get('noprogress'):
- return
-
- if s['status'] != 'downloading':
- return
-
- if s.get('eta') is not None:
- s['_eta_str'] = self.format_eta(s['eta'])
- else:
- s['_eta_str'] = 'Unknown ETA'
-
- if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
- s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
- elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
- s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
- else:
- if s.get('downloaded_bytes') == 0:
- s['_percent_str'] = self.format_percent(0)
- else:
- s['_percent_str'] = 'Unknown %'
-
- if s.get('speed') is not None:
- s['_speed_str'] = self.format_speed(s['speed'])
- else:
- s['_speed_str'] = 'Unknown speed'
-
- if s.get('total_bytes') is not None:
- s['_total_bytes_str'] = format_bytes(s['total_bytes'])
- msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
- elif s.get('total_bytes_estimate') is not None:
- s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
- msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
- else:
- if s.get('downloaded_bytes') is not None:
- s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
- if s.get('elapsed'):
- s['_elapsed_str'] = self.format_seconds(s['elapsed'])
- msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
- else:
- msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
- else:
- msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s'
-
- self._report_progress_status(msg_template % s)
-
- def report_resuming_byte(self, resume_len):
- """Report attempt to resume at given byte."""
- self.to_screen('[download] Resuming download at byte %s' % resume_len)
-
- def report_retry(self, err, count, retries):
- """Report retry in case of HTTP error 5xx"""
- self.to_screen(
- '[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...'
- % (error_to_compat_str(err), count, self.format_retries(retries)))
-
- def report_file_already_downloaded(self, file_name):
- """Report file has already been fully downloaded."""
- try:
- self.to_screen('[download] %s has already been downloaded' % file_name)
- except UnicodeEncodeError:
- self.to_screen('[download] The file has already been downloaded')
-
- def report_unable_to_resume(self):
- """Report it was impossible to resume download."""
- self.to_screen('[download] Unable to resume')
-
- def download(self, filename, info_dict):
- """Download to a filename using the info from info_dict
- Return True on success and False otherwise
- """
-
- nooverwrites_and_exists = (
- self.params.get('nooverwrites', False)
- and os.path.exists(encodeFilename(filename))
- )
-
- if not hasattr(filename, 'write'):
- continuedl_and_exists = (
- self.params.get('continuedl', True)
- and os.path.isfile(encodeFilename(filename))
- and not self.params.get('nopart', False)
- )
-
- # Check file already present
- if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
- self.report_file_already_downloaded(filename)
- self._hook_progress({
- 'filename': filename,
- 'status': 'finished',
- 'total_bytes': os.path.getsize(encodeFilename(filename)),
- })
- return True
-
- min_sleep_interval = self.params.get('sleep_interval')
- if min_sleep_interval:
- max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
- sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
- self.to_screen(
- '[download] Sleeping %s seconds...' % (
- int(sleep_interval) if sleep_interval.is_integer()
- else '%.2f' % sleep_interval))
- time.sleep(sleep_interval)
-
- return self.real_download(filename, info_dict)
-
- def real_download(self, filename, info_dict):
- """Real download process. Redefine in subclasses."""
- raise NotImplementedError('This method must be implemented by subclasses')
-
- def _hook_progress(self, status):
- for ph in self._progress_hooks:
- ph(status)
-
- def add_progress_hook(self, ph):
- # See YoutubeDl.py (search for progress_hooks) for a description of
- # this interface
- self._progress_hooks.append(ph)
-
- def _debug_cmd(self, args, exe=None):
- if not self.params.get('verbose', False):
- return
-
- str_args = [decodeArgument(a) for a in args]
-
- if exe is None:
- exe = os.path.basename(str_args[0])
-
- self.to_screen('[debug] %s command line: %s' % (
- exe, shell_quote(str_args)))
diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py
deleted file mode 100644
index 02f35459e..000000000
--- a/youtube_dl/downloader/fragment.py
+++ /dev/null
@@ -1,269 +0,0 @@
-from __future__ import division, unicode_literals
-
-import os
-import time
-import json
-
-from .common import FileDownloader
-from .http import HttpFD
-from ..utils import (
- error_to_compat_str,
- encodeFilename,
- sanitize_open,
- sanitized_Request,
-)
-
-
-class HttpQuietDownloader(HttpFD):
- def to_screen(self, *args, **kargs):
- pass
-
-
-class FragmentFD(FileDownloader):
- """
- A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests).
-
- Available options:
-
- fragment_retries: Number of times to retry a fragment for HTTP error (DASH
- and hlsnative only)
- skip_unavailable_fragments:
- Skip unavailable fragments (DASH and hlsnative only)
- keep_fragments: Keep downloaded fragments on disk after downloading is
- finished
-
- For each incomplete fragment download youtube-dl keeps on disk a special
- bookkeeping file with download state and metadata (in future such files will
- be used for any incomplete download handled by youtube-dl). This file is
- used to properly handle resuming, check download file consistency and detect
- potential errors. The file has a .ytdl extension and represents a standard
- JSON file of the following format:
-
- extractor:
- Dictionary of extractor related data. TBD.
-
- downloader:
- Dictionary of downloader related data. May contain following data:
- current_fragment:
- Dictionary with current (being downloaded) fragment data:
- index: 0-based index of current fragment among all fragments
- fragment_count:
- Total count of fragments
-
- This feature is experimental and file format may change in future.
- """
-
- def report_retry_fragment(self, err, frag_index, count, retries):
- self.to_screen(
- '[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s)...'
- % (error_to_compat_str(err), frag_index, count, self.format_retries(retries)))
-
- def report_skip_fragment(self, frag_index):
- self.to_screen('[download] Skipping fragment %d...' % frag_index)
-
- def _prepare_url(self, info_dict, url):
- headers = info_dict.get('http_headers')
- return sanitized_Request(url, None, headers) if headers else url
-
- def _prepare_and_start_frag_download(self, ctx):
- self._prepare_frag_download(ctx)
- self._start_frag_download(ctx)
-
- @staticmethod
- def __do_ytdl_file(ctx):
- return not ctx['live'] and not ctx['tmpfilename'] == '-'
-
- def _read_ytdl_file(self, ctx):
- assert 'ytdl_corrupt' not in ctx
- stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r')
- try:
- ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index']
- except Exception:
- ctx['ytdl_corrupt'] = True
- finally:
- stream.close()
-
- def _write_ytdl_file(self, ctx):
- frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w')
- downloader = {
- 'current_fragment': {
- 'index': ctx['fragment_index'],
- },
- }
- if ctx.get('fragment_count') is not None:
- downloader['fragment_count'] = ctx['fragment_count']
- frag_index_stream.write(json.dumps({'downloader': downloader}))
- frag_index_stream.close()
-
- def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
- fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
- success = ctx['dl'].download(fragment_filename, {
- 'url': frag_url,
- 'http_headers': headers or info_dict.get('http_headers'),
- })
- if not success:
- return False, None
- down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
- ctx['fragment_filename_sanitized'] = frag_sanitized
- frag_content = down.read()
- down.close()
- return True, frag_content
-
- def _append_fragment(self, ctx, frag_content):
- try:
- ctx['dest_stream'].write(frag_content)
- ctx['dest_stream'].flush()
- finally:
- if self.__do_ytdl_file(ctx):
- self._write_ytdl_file(ctx)
- if not self.params.get('keep_fragments', False):
- os.remove(encodeFilename(ctx['fragment_filename_sanitized']))
- del ctx['fragment_filename_sanitized']
-
- def _prepare_frag_download(self, ctx):
- if 'live' not in ctx:
- ctx['live'] = False
- if not ctx['live']:
- total_frags_str = '%d' % ctx['total_frags']
- ad_frags = ctx.get('ad_frags', 0)
- if ad_frags:
- total_frags_str += ' (not including %d ad)' % ad_frags
- else:
- total_frags_str = 'unknown (live)'
- self.to_screen(
- '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
- self.report_destination(ctx['filename'])
- dl = HttpQuietDownloader(
- self.ydl,
- {
- 'continuedl': True,
- 'quiet': True,
- 'noprogress': True,
- 'ratelimit': self.params.get('ratelimit'),
- 'retries': self.params.get('retries', 0),
- 'nopart': self.params.get('nopart', False),
- 'test': self.params.get('test', False),
- }
- )
- tmpfilename = self.temp_name(ctx['filename'])
- open_mode = 'wb'
- resume_len = 0
-
- # Establish possible resume length
- if os.path.isfile(encodeFilename(tmpfilename)):
- open_mode = 'ab'
- resume_len = os.path.getsize(encodeFilename(tmpfilename))
-
- # Should be initialized before ytdl file check
- ctx.update({
- 'tmpfilename': tmpfilename,
- 'fragment_index': 0,
- })
-
- if self.__do_ytdl_file(ctx):
- if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
- self._read_ytdl_file(ctx)
- is_corrupt = ctx.get('ytdl_corrupt') is True
- is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
- if is_corrupt or is_inconsistent:
- message = (
- '.ytdl file is corrupt' if is_corrupt else
- 'Inconsistent state of incomplete fragment download')
- self.report_warning(
- '%s. Restarting from the beginning...' % message)
- ctx['fragment_index'] = resume_len = 0
- if 'ytdl_corrupt' in ctx:
- del ctx['ytdl_corrupt']
- self._write_ytdl_file(ctx)
- else:
- self._write_ytdl_file(ctx)
- assert ctx['fragment_index'] == 0
-
- dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode)
-
- ctx.update({
- 'dl': dl,
- 'dest_stream': dest_stream,
- 'tmpfilename': tmpfilename,
- # Total complete fragments downloaded so far in bytes
- 'complete_frags_downloaded_bytes': resume_len,
- })
-
- def _start_frag_download(self, ctx):
- resume_len = ctx['complete_frags_downloaded_bytes']
- total_frags = ctx['total_frags']
- # This dict stores the download progress, it's updated by the progress
- # hook
- state = {
- 'status': 'downloading',
- 'downloaded_bytes': resume_len,
- 'fragment_index': ctx['fragment_index'],
- 'fragment_count': total_frags,
- 'filename': ctx['filename'],
- 'tmpfilename': ctx['tmpfilename'],
- }
-
- start = time.time()
- ctx.update({
- 'started': start,
- # Amount of fragment's bytes downloaded by the time of the previous
- # frag progress hook invocation
- 'prev_frag_downloaded_bytes': 0,
- })
-
- def frag_progress_hook(s):
- if s['status'] not in ('downloading', 'finished'):
- return
-
- time_now = time.time()
- state['elapsed'] = time_now - start
- frag_total_bytes = s.get('total_bytes') or 0
- if not ctx['live']:
- estimated_size = (
- (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes)
- / (state['fragment_index'] + 1) * total_frags)
- state['total_bytes_estimate'] = estimated_size
-
- if s['status'] == 'finished':
- state['fragment_index'] += 1
- ctx['fragment_index'] = state['fragment_index']
- state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
- ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
- ctx['prev_frag_downloaded_bytes'] = 0
- else:
- frag_downloaded_bytes = s['downloaded_bytes']
- state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
- if not ctx['live']:
- state['eta'] = self.calc_eta(
- start, time_now, estimated_size - resume_len,
- state['downloaded_bytes'] - resume_len)
- state['speed'] = s.get('speed') or ctx.get('speed')
- ctx['speed'] = state['speed']
- ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
- self._hook_progress(state)
-
- ctx['dl'].add_progress_hook(frag_progress_hook)
-
- return start
-
- def _finish_frag_download(self, ctx):
- ctx['dest_stream'].close()
- if self.__do_ytdl_file(ctx):
- ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename']))
- if os.path.isfile(ytdl_filename):
- os.remove(ytdl_filename)
- elapsed = time.time() - ctx['started']
-
- if ctx['tmpfilename'] == '-':
- downloaded_bytes = ctx['complete_frags_downloaded_bytes']
- else:
- self.try_rename(ctx['tmpfilename'], ctx['filename'])
- downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
-
- self._hook_progress({
- 'downloaded_bytes': downloaded_bytes,
- 'total_bytes': downloaded_bytes,
- 'filename': ctx['filename'],
- 'status': 'finished',
- 'elapsed': elapsed,
- })
diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py
deleted file mode 100644
index b59aad73f..000000000
--- a/youtube_dl/downloader/hls.py
+++ /dev/null
@@ -1,210 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-import binascii
-try:
- from Crypto.Cipher import AES
- can_decrypt_frag = True
-except ImportError:
- can_decrypt_frag = False
-
-from .fragment import FragmentFD
-from .external import FFmpegFD
-
-from ..compat import (
- compat_urllib_error,
- compat_urlparse,
- compat_struct_pack,
-)
-from ..utils import (
- parse_m3u8_attributes,
- update_url_query,
-)
-
-
-class HlsFD(FragmentFD):
- """ A limited implementation that does not require ffmpeg """
-
- FD_NAME = 'hlsnative'
-
- @staticmethod
- def can_download(manifest, info_dict):
- UNSUPPORTED_FEATURES = (
- r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
- # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
-
- # Live streams heuristic does not always work (e.g. geo restricted to Germany
- # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0)
- # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3]
-
- # This heuristic also is not correct since segments may not be appended as well.
- # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite
- # no segments will definitely be appended to the end of the playlist.
- # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of
- # # event media playlists [4]
-
- # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
- # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
- # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
- # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
- )
- check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
- is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest
- check_results.append(can_decrypt_frag or not is_aes128_enc)
- check_results.append(not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest))
- check_results.append(not info_dict.get('is_live'))
- return all(check_results)
-
- def real_download(self, filename, info_dict):
- man_url = info_dict['url']
- self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
-
- urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
- man_url = urlh.geturl()
- s = urlh.read().decode('utf-8', 'ignore')
-
- if not self.can_download(s, info_dict):
- if info_dict.get('extra_param_to_segment_url'):
- self.report_error('pycrypto not found. Please install it.')
- return False
- self.report_warning(
- 'hlsnative has detected features it does not support, '
- 'extraction will be delegated to ffmpeg')
- fd = FFmpegFD(self.ydl, self.params)
- for ph in self._progress_hooks:
- fd.add_progress_hook(ph)
- return fd.real_download(filename, info_dict)
-
- def is_ad_fragment_start(s):
- return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
- or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
-
- def is_ad_fragment_end(s):
- return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s
- or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))
-
- media_frags = 0
- ad_frags = 0
- ad_frag_next = False
- for line in s.splitlines():
- line = line.strip()
- if not line:
- continue
- if line.startswith('#'):
- if is_ad_fragment_start(line):
- ad_frag_next = True
- elif is_ad_fragment_end(line):
- ad_frag_next = False
- continue
- if ad_frag_next:
- ad_frags += 1
- continue
- media_frags += 1
-
- ctx = {
- 'filename': filename,
- 'total_frags': media_frags,
- 'ad_frags': ad_frags,
- }
-
- self._prepare_and_start_frag_download(ctx)
-
- fragment_retries = self.params.get('fragment_retries', 0)
- skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
- test = self.params.get('test', False)
-
- extra_query = None
- extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
- if extra_param_to_segment_url:
- extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url)
- i = 0
- media_sequence = 0
- decrypt_info = {'METHOD': 'NONE'}
- byte_range = {}
- frag_index = 0
- ad_frag_next = False
- for line in s.splitlines():
- line = line.strip()
- if line:
- if not line.startswith('#'):
- if ad_frag_next:
- continue
- frag_index += 1
- if frag_index <= ctx['fragment_index']:
- continue
- frag_url = (
- line
- if re.match(r'^https?://', line)
- else compat_urlparse.urljoin(man_url, line))
- if extra_query:
- frag_url = update_url_query(frag_url, extra_query)
- count = 0
- headers = info_dict.get('http_headers', {})
- if byte_range:
- headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'])
- while count <= fragment_retries:
- try:
- success, frag_content = self._download_fragment(
- ctx, frag_url, info_dict, headers)
- if not success:
- return False
- break
- except compat_urllib_error.HTTPError as err:
- # Unavailable (possibly temporary) fragments may be served.
- # First we try to retry then either skip or abort.
- # See https://github.com/ytdl-org/youtube-dl/issues/10165,
- # https://github.com/ytdl-org/youtube-dl/issues/10448).
- count += 1
- if count <= fragment_retries:
- self.report_retry_fragment(err, frag_index, count, fragment_retries)
- if count > fragment_retries:
- if skip_unavailable_fragments:
- i += 1
- media_sequence += 1
- self.report_skip_fragment(frag_index)
- continue
- self.report_error(
- 'giving up after %s fragment retries' % fragment_retries)
- return False
- if decrypt_info['METHOD'] == 'AES-128':
- iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
- decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
- self._prepare_url(info_dict, decrypt_info['URI'])).read()
- frag_content = AES.new(
- decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
- self._append_fragment(ctx, frag_content)
- # We only download the first fragment during the test
- if test:
- break
- i += 1
- media_sequence += 1
- elif line.startswith('#EXT-X-KEY'):
- decrypt_url = decrypt_info.get('URI')
- decrypt_info = parse_m3u8_attributes(line[11:])
- if decrypt_info['METHOD'] == 'AES-128':
- if 'IV' in decrypt_info:
- decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32))
- if not re.match(r'^https?://', decrypt_info['URI']):
- decrypt_info['URI'] = compat_urlparse.urljoin(
- man_url, decrypt_info['URI'])
- if extra_query:
- decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
- if decrypt_url != decrypt_info['URI']:
- decrypt_info['KEY'] = None
- elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
- media_sequence = int(line[22:])
- elif line.startswith('#EXT-X-BYTERANGE'):
- splitted_byte_range = line[17:].split('@')
- sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
- byte_range = {
- 'start': sub_range_start,
- 'end': sub_range_start + int(splitted_byte_range[0]),
- }
- elif is_ad_fragment_start(line):
- ad_frag_next = True
- elif is_ad_fragment_end(line):
- ad_frag_next = False
-
- self._finish_frag_download(ctx)
-
- return True
diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py
deleted file mode 100644
index 3c72ea18b..000000000
--- a/youtube_dl/downloader/http.py
+++ /dev/null
@@ -1,354 +0,0 @@
-from __future__ import unicode_literals
-
-import errno
-import os
-import socket
-import time
-import random
-import re
-
-from .common import FileDownloader
-from ..compat import (
- compat_str,
- compat_urllib_error,
-)
-from ..utils import (
- ContentTooShortError,
- encodeFilename,
- int_or_none,
- sanitize_open,
- sanitized_Request,
- write_xattr,
- XAttrMetadataError,
- XAttrUnavailableError,
-)
-
-
-class HttpFD(FileDownloader):
- def real_download(self, filename, info_dict):
- url = info_dict['url']
-
- class DownloadContext(dict):
- __getattr__ = dict.get
- __setattr__ = dict.__setitem__
- __delattr__ = dict.__delitem__
-
- ctx = DownloadContext()
- ctx.filename = filename
- ctx.tmpfilename = self.temp_name(filename)
- ctx.stream = None
-
- # Do not include the Accept-Encoding header
- headers = {'Youtubedl-no-compression': 'True'}
- add_headers = info_dict.get('http_headers')
- if add_headers:
- headers.update(add_headers)
-
- is_test = self.params.get('test', False)
- chunk_size = self._TEST_FILE_SIZE if is_test else (
- info_dict.get('downloader_options', {}).get('http_chunk_size')
- or self.params.get('http_chunk_size') or 0)
-
- ctx.open_mode = 'wb'
- ctx.resume_len = 0
- ctx.data_len = None
- ctx.block_size = self.params.get('buffersize', 1024)
- ctx.start_time = time.time()
- ctx.chunk_size = None
-
- if self.params.get('continuedl', True):
- # Establish possible resume length
- if os.path.isfile(encodeFilename(ctx.tmpfilename)):
- ctx.resume_len = os.path.getsize(
- encodeFilename(ctx.tmpfilename))
-
- ctx.is_resume = ctx.resume_len > 0
-
- count = 0
- retries = self.params.get('retries', 0)
-
- class SucceedDownload(Exception):
- pass
-
- class RetryDownload(Exception):
- def __init__(self, source_error):
- self.source_error = source_error
-
- class NextFragment(Exception):
- pass
-
- def set_range(req, start, end):
- range_header = 'bytes=%d-' % start
- if end:
- range_header += compat_str(end)
- req.add_header('Range', range_header)
-
- def establish_connection():
- ctx.chunk_size = (random.randint(int(chunk_size * 0.95), chunk_size)
- if not is_test and chunk_size else chunk_size)
- if ctx.resume_len > 0:
- range_start = ctx.resume_len
- if ctx.is_resume:
- self.report_resuming_byte(ctx.resume_len)
- ctx.open_mode = 'ab'
- elif ctx.chunk_size > 0:
- range_start = 0
- else:
- range_start = None
- ctx.is_resume = False
- range_end = range_start + ctx.chunk_size - 1 if ctx.chunk_size else None
- if range_end and ctx.data_len is not None and range_end >= ctx.data_len:
- range_end = ctx.data_len - 1
- has_range = range_start is not None
- ctx.has_range = has_range
- request = sanitized_Request(url, None, headers)
- if has_range:
- set_range(request, range_start, range_end)
- # Establish connection
- try:
- ctx.data = self.ydl.urlopen(request)
- # When trying to resume, Content-Range HTTP header of response has to be checked
- # to match the value of requested Range HTTP header. This is due to a webservers
- # that don't support resuming and serve a whole file with no Content-Range
- # set in response despite of requested Range (see
- # https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799)
- if has_range:
- content_range = ctx.data.headers.get('Content-Range')
- if content_range:
- content_range_m = re.search(r'bytes (\d+)-(\d+)?(?:/(\d+))?', content_range)
- # Content-Range is present and matches requested Range, resume is possible
- if content_range_m:
- if range_start == int(content_range_m.group(1)):
- content_range_end = int_or_none(content_range_m.group(2))
- content_len = int_or_none(content_range_m.group(3))
- accept_content_len = (
- # Non-chunked download
- not ctx.chunk_size
- # Chunked download and requested piece or
- # its part is promised to be served
- or content_range_end == range_end
- or content_len < range_end)
- if accept_content_len:
- ctx.data_len = content_len
- return
- # Content-Range is either not present or invalid. Assuming remote webserver is
- # trying to send the whole file, resume is not possible, so wiping the local file
- # and performing entire redownload
- self.report_unable_to_resume()
- ctx.resume_len = 0
- ctx.open_mode = 'wb'
- ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None))
- return
- except (compat_urllib_error.HTTPError, ) as err:
- if err.code == 416:
- # Unable to resume (requested range not satisfiable)
- try:
- # Open the connection again without the range header
- ctx.data = self.ydl.urlopen(
- sanitized_Request(url, None, headers))
- content_length = ctx.data.info()['Content-Length']
- except (compat_urllib_error.HTTPError, ) as err:
- if err.code < 500 or err.code >= 600:
- raise
- else:
- # Examine the reported length
- if (content_length is not None
- and (ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)):
- # The file had already been fully downloaded.
- # Explanation to the above condition: in issue #175 it was revealed that
- # YouTube sometimes adds or removes a few bytes from the end of the file,
- # changing the file size slightly and causing problems for some users. So
- # I decided to implement a suggested change and consider the file
- # completely downloaded if the file size differs less than 100 bytes from
- # the one in the hard drive.
- self.report_file_already_downloaded(ctx.filename)
- self.try_rename(ctx.tmpfilename, ctx.filename)
- self._hook_progress({
- 'filename': ctx.filename,
- 'status': 'finished',
- 'downloaded_bytes': ctx.resume_len,
- 'total_bytes': ctx.resume_len,
- })
- raise SucceedDownload()
- else:
- # The length does not match, we start the download over
- self.report_unable_to_resume()
- ctx.resume_len = 0
- ctx.open_mode = 'wb'
- return
- elif err.code < 500 or err.code >= 600:
- # Unexpected HTTP error
- raise
- raise RetryDownload(err)
- except socket.error as err:
- if err.errno != errno.ECONNRESET:
- # Connection reset is no problem, just retry
- raise
- raise RetryDownload(err)
-
- def download():
- data_len = ctx.data.info().get('Content-length', None)
-
- # Range HTTP header may be ignored/unsupported by a webserver
- # (e.g. extractor/scivee.py, extractor/bambuser.py).
- # However, for a test we still would like to download just a piece of a file.
- # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
- # block size when downloading a file.
- if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
- data_len = self._TEST_FILE_SIZE
-
- if data_len is not None:
- data_len = int(data_len) + ctx.resume_len
- min_data_len = self.params.get('min_filesize')
- max_data_len = self.params.get('max_filesize')
- if min_data_len is not None and data_len < min_data_len:
- self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
- return False
- if max_data_len is not None and data_len > max_data_len:
- self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
- return False
-
- byte_counter = 0 + ctx.resume_len
- block_size = ctx.block_size
- start = time.time()
-
- # measure time over whole while-loop, so slow_down() and best_block_size() work together properly
- now = None # needed for slow_down() in the first loop run
- before = start # start measuring
-
- def retry(e):
- to_stdout = ctx.tmpfilename == '-'
- if not to_stdout:
- ctx.stream.close()
- ctx.stream = None
- ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename))
- raise RetryDownload(e)
-
- while True:
- try:
- # Download and write
- data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter))
- # socket.timeout is a subclass of socket.error but may not have
- # errno set
- except socket.timeout as e:
- retry(e)
- except socket.error as e:
- if e.errno not in (errno.ECONNRESET, errno.ETIMEDOUT):
- raise
- retry(e)
-
- byte_counter += len(data_block)
-
- # exit loop when download is finished
- if len(data_block) == 0:
- break
-
- # Open destination file just in time
- if ctx.stream is None:
- try:
- ctx.stream, ctx.tmpfilename = sanitize_open(
- ctx.tmpfilename, ctx.open_mode)
- assert ctx.stream is not None
- ctx.filename = self.undo_temp_name(ctx.tmpfilename)
- self.report_destination(ctx.filename)
- except (OSError, IOError) as err:
- self.report_error('unable to open for writing: %s' % str(err))
- return False
-
- if self.params.get('xattr_set_filesize', False) and data_len is not None:
- try:
- write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
- except (XAttrUnavailableError, XAttrMetadataError) as err:
- self.report_error('unable to set filesize xattr: %s' % str(err))
-
- try:
- ctx.stream.write(data_block)
- except (IOError, OSError) as err:
- self.to_stderr('\n')
- self.report_error('unable to write data: %s' % str(err))
- return False
-
- # Apply rate limit
- self.slow_down(start, now, byte_counter - ctx.resume_len)
-
- # end measuring of one loop run
- now = time.time()
- after = now
-
- # Adjust block size
- if not self.params.get('noresizebuffer', False):
- block_size = self.best_block_size(after - before, len(data_block))
-
- before = after
-
- # Progress message
- speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
- if ctx.data_len is None:
- eta = None
- else:
- eta = self.calc_eta(start, time.time(), ctx.data_len - ctx.resume_len, byte_counter - ctx.resume_len)
-
- self._hook_progress({
- 'status': 'downloading',
- 'downloaded_bytes': byte_counter,
- 'total_bytes': ctx.data_len,
- 'tmpfilename': ctx.tmpfilename,
- 'filename': ctx.filename,
- 'eta': eta,
- 'speed': speed,
- 'elapsed': now - ctx.start_time,
- })
-
- if is_test and byte_counter == data_len:
- break
-
- if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len:
- ctx.resume_len = byte_counter
- # ctx.block_size = block_size
- raise NextFragment()
-
- if ctx.stream is None:
- self.to_stderr('\n')
- self.report_error('Did not get any data blocks')
- return False
- if ctx.tmpfilename != '-':
- ctx.stream.close()
-
- if data_len is not None and byte_counter != data_len:
- err = ContentTooShortError(byte_counter, int(data_len))
- if count <= retries:
- retry(err)
- raise err
-
- self.try_rename(ctx.tmpfilename, ctx.filename)
-
- # Update file modification time
- if self.params.get('updatetime', True):
- info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None))
-
- self._hook_progress({
- 'downloaded_bytes': byte_counter,
- 'total_bytes': byte_counter,
- 'filename': ctx.filename,
- 'status': 'finished',
- 'elapsed': time.time() - ctx.start_time,
- })
-
- return True
-
- while count <= retries:
- try:
- establish_connection()
- return download()
- except RetryDownload as e:
- count += 1
- if count <= retries:
- self.report_retry(e.source_error, count, retries)
- continue
- except NextFragment:
- continue
- except SucceedDownload:
- return True
-
- self.report_error('giving up after %s retries' % retries)
- return False
diff --git a/youtube_dl/extractor/abc.py b/youtube_dl/extractor/abc.py
deleted file mode 100644
index 4ac323bf6..000000000
--- a/youtube_dl/extractor/abc.py
+++ /dev/null
@@ -1,193 +0,0 @@
-from __future__ import unicode_literals
-
-import hashlib
-import hmac
-import re
-import time
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- ExtractorError,
- js_to_json,
- int_or_none,
- parse_iso8601,
- try_get,
- unescapeHTML,
- update_url_query,
-)
-
-
-class ABCIE(InfoExtractor):
- IE_NAME = 'abc.net.au'
- _VALID_URL = r'https?://(?:www\.)?abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)'
-
- _TESTS = [{
- 'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334',
- 'md5': 'cb3dd03b18455a661071ee1e28344d9f',
- 'info_dict': {
- 'id': '5868334',
- 'ext': 'mp4',
- 'title': 'Australia to help staff Ebola treatment centre in Sierra Leone',
- 'description': 'md5:809ad29c67a05f54eb41f2a105693a67',
- },
- 'skip': 'this video has expired',
- }, {
- 'url': 'http://www.abc.net.au/news/2015-08-17/warren-entsch-introduces-same-sex-marriage-bill/6702326',
- 'md5': 'db2a5369238b51f9811ad815b69dc086',
- 'info_dict': {
- 'id': 'NvqvPeNZsHU',
- 'ext': 'mp4',
- 'upload_date': '20150816',
- 'uploader': 'ABC News (Australia)',
- 'description': 'Government backbencher Warren Entsch introduces a cross-party sponsored bill to legalise same-sex marriage, saying the bill is designed to promote "an inclusive Australia, not a divided one.". Read more here: http://ab.co/1Mwc6ef',
- 'uploader_id': 'NewsOnABC',
- 'title': 'Marriage Equality: Warren Entsch introduces same sex marriage bill',
- },
- 'add_ie': ['Youtube'],
- 'skip': 'Not accessible from Travis CI server',
- }, {
- 'url': 'http://www.abc.net.au/news/2015-10-23/nab-lifts-interest-rates-following-westpac-and-cba/6880080',
- 'md5': 'b96eee7c9edf4fc5a358a0252881cc1f',
- 'info_dict': {
- 'id': '6880080',
- 'ext': 'mp3',
- 'title': 'NAB lifts interest rates, following Westpac and CBA',
- 'description': 'md5:f13d8edc81e462fce4a0437c7dc04728',
- },
- }, {
- 'url': 'http://www.abc.net.au/news/2015-10-19/6866214',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- mobj = re.search(
- r'inline(?P<type>Video|Audio|YouTube)Data\.push\((?P<json_data>[^)]+)\);',
- webpage)
- if mobj is None:
- expired = self._html_search_regex(r'(?s)class="expired-(?:video|audio)".+?<span>(.+?)</span>', webpage, 'expired', None)
- if expired:
- raise ExtractorError('%s said: %s' % (self.IE_NAME, expired), expected=True)
- raise ExtractorError('Unable to extract video urls')
-
- urls_info = self._parse_json(
- mobj.group('json_data'), video_id, transform_source=js_to_json)
-
- if not isinstance(urls_info, list):
- urls_info = [urls_info]
-
- if mobj.group('type') == 'YouTube':
- return self.playlist_result([
- self.url_result(url_info['url']) for url_info in urls_info])
-
- formats = [{
- 'url': url_info['url'],
- 'vcodec': url_info.get('codec') if mobj.group('type') == 'Video' else 'none',
- 'width': int_or_none(url_info.get('width')),
- 'height': int_or_none(url_info.get('height')),
- 'tbr': int_or_none(url_info.get('bitrate')),
- 'filesize': int_or_none(url_info.get('filesize')),
- } for url_info in urls_info]
-
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': self._og_search_title(webpage),
- 'formats': formats,
- 'description': self._og_search_description(webpage),
- 'thumbnail': self._og_search_thumbnail(webpage),
- }
-
-
-class ABCIViewIE(InfoExtractor):
- IE_NAME = 'abc.net.au:iview'
- _VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P<id>[^/?#]+)'
- _GEO_COUNTRIES = ['AU']
-
- # ABC iview programs are normally available for 14 days only.
- _TESTS = [{
- 'url': 'https://iview.abc.net.au/show/ben-and-hollys-little-kingdom/series/0/video/ZX9371A050S00',
- 'md5': 'cde42d728b3b7c2b32b1b94b4a548afc',
- 'info_dict': {
- 'id': 'ZX9371A050S00',
- 'ext': 'mp4',
- 'title': "Gaston's Birthday",
- 'series': "Ben And Holly's Little Kingdom",
- 'description': 'md5:f9de914d02f226968f598ac76f105bcf',
- 'upload_date': '20180604',
- 'uploader_id': 'abc4kids',
- 'timestamp': 1528140219,
- },
- 'params': {
- 'skip_download': True,
- },
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- video_params = self._download_json(
- 'https://iview.abc.net.au/api/programs/' + video_id, video_id)
- title = unescapeHTML(video_params.get('title') or video_params['seriesTitle'])
- stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream'))
-
- house_number = video_params.get('episodeHouseNumber') or video_id
- path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format(
- int(time.time()), house_number)
- sig = hmac.new(
- b'android.content.res.Resources',
- path.encode('utf-8'), hashlib.sha256).hexdigest()
- token = self._download_webpage(
- 'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id)
-
- def tokenize_url(url, token):
- return update_url_query(url, {
- 'hdnea': token,
- })
-
- for sd in ('sd', 'sd-low'):
- sd_url = try_get(
- stream, lambda x: x['streams']['hls'][sd], compat_str)
- if not sd_url:
- continue
- formats = self._extract_m3u8_formats(
- tokenize_url(sd_url, token), video_id, 'mp4',
- entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
- if formats:
- break
- self._sort_formats(formats)
-
- subtitles = {}
- src_vtt = stream.get('captions', {}).get('src-vtt')
- if src_vtt:
- subtitles['en'] = [{
- 'url': src_vtt,
- 'ext': 'vtt',
- }]
-
- is_live = video_params.get('livestream') == '1'
- if is_live:
- title = self._live_title(title)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': video_params.get('description'),
- 'thumbnail': video_params.get('thumbnail'),
- 'duration': int_or_none(video_params.get('eventDuration')),
- 'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
- 'series': unescapeHTML(video_params.get('seriesTitle')),
- 'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
- 'season_number': int_or_none(self._search_regex(
- r'\bSeries\s+(\d+)\b', title, 'season number', default=None)),
- 'episode_number': int_or_none(self._search_regex(
- r'\bEp\s+(\d+)\b', title, 'episode number', default=None)),
- 'episode_id': house_number,
- 'uploader_id': video_params.get('channel'),
- 'formats': formats,
- 'subtitles': subtitles,
- 'is_live': is_live,
- }
diff --git a/youtube_dl/extractor/abcotvs.py b/youtube_dl/extractor/abcotvs.py
deleted file mode 100644
index 03b92a39c..000000000
--- a/youtube_dl/extractor/abcotvs.py
+++ /dev/null
@@ -1,112 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- parse_iso8601,
-)
-
-
-class ABCOTVSIE(InfoExtractor):
- IE_NAME = 'abcotvs'
- IE_DESC = 'ABC Owned Television Stations'
- _VALID_URL = r'https?://(?:abc(?:7(?:news|ny|chicago)?|11|13|30)|6abc)\.com(?:/[^/]+/(?P<display_id>[^/]+))?/(?P<id>\d+)'
- _TESTS = [
- {
- 'url': 'http://abc7news.com/entertainment/east-bay-museum-celebrates-vintage-synthesizers/472581/',
- 'info_dict': {
- 'id': '472581',
- 'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
- 'ext': 'mp4',
- 'title': 'East Bay museum celebrates vintage synthesizers',
- 'description': 'md5:24ed2bd527096ec2a5c67b9d5a9005f3',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'timestamp': 1421123075,
- 'upload_date': '20150113',
- 'uploader': 'Jonathan Bloom',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- },
- {
- 'url': 'http://abc7news.com/472581',
- 'only_matching': True,
- },
- ]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- display_id = mobj.group('display_id') or video_id
-
- webpage = self._download_webpage(url, display_id)
-
- m3u8 = self._html_search_meta(
- 'contentURL', webpage, 'm3u8 url', fatal=True).split('?')[0]
-
- formats = self._extract_m3u8_formats(m3u8, display_id, 'mp4')
- self._sort_formats(formats)
-
- title = self._og_search_title(webpage).strip()
- description = self._og_search_description(webpage).strip()
- thumbnail = self._og_search_thumbnail(webpage)
- timestamp = parse_iso8601(self._search_regex(
- r'<div class="meta">\s*<time class="timeago" datetime="([^"]+)">',
- webpage, 'upload date', fatal=False))
- uploader = self._search_regex(
- r'rel="author">([^<]+)</a>',
- webpage, 'uploader', default=None)
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'timestamp': timestamp,
- 'uploader': uploader,
- 'formats': formats,
- }
-
-
-class ABCOTVSClipsIE(InfoExtractor):
- IE_NAME = 'abcotvs:clips'
- _VALID_URL = r'https?://clips\.abcotvs\.com/(?:[^/]+/)*video/(?P<id>\d+)'
- _TEST = {
- 'url': 'https://clips.abcotvs.com/kabc/video/214814',
- 'info_dict': {
- 'id': '214814',
- 'ext': 'mp4',
- 'title': 'SpaceX launch pad explosion destroys rocket, satellite',
- 'description': 'md5:9f186e5ad8f490f65409965ee9c7be1b',
- 'upload_date': '20160901',
- 'timestamp': 1472756695,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- video_data = self._download_json('https://clips.abcotvs.com/vogo/video/getByIds?ids=' + video_id, video_id)['results'][0]
- title = video_data['title']
- formats = self._extract_m3u8_formats(
- video_data['videoURL'].split('?')[0], video_id, 'mp4')
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': video_data.get('description'),
- 'thumbnail': video_data.get('thumbnailURL'),
- 'duration': int_or_none(video_data.get('duration')),
- 'timestamp': int_or_none(video_data.get('pubDate')),
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/addanime.py b/youtube_dl/extractor/addanime.py
deleted file mode 100644
index 5e7c0724e..000000000
--- a/youtube_dl/extractor/addanime.py
+++ /dev/null
@@ -1,95 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_str,
- compat_urllib_parse_urlencode,
- compat_urllib_parse_urlparse,
-)
-from ..utils import (
- ExtractorError,
- qualities,
-)
-
-
-class AddAnimeIE(InfoExtractor):
- _VALID_URL = r'https?://(?:\w+\.)?add-anime\.net/(?:watch_video\.php\?(?:.*?)v=|video/)(?P<id>[\w_]+)'
- _TESTS = [{
- 'url': 'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
- 'md5': '72954ea10bc979ab5e2eb288b21425a0',
- 'info_dict': {
- 'id': '24MR3YO5SAS9',
- 'ext': 'mp4',
- 'description': 'One Piece 606',
- 'title': 'One Piece 606',
- },
- 'skip': 'Video is gone',
- }, {
- 'url': 'http://add-anime.net/video/MDUGWYKNGBD8/One-Piece-687',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- try:
- webpage = self._download_webpage(url, video_id)
- except ExtractorError as ee:
- if not isinstance(ee.cause, compat_HTTPError) or \
- ee.cause.code != 503:
- raise
-
- redir_webpage = ee.cause.read().decode('utf-8')
- action = self._search_regex(
- r'<form id="challenge-form" action="([^"]+)"',
- redir_webpage, 'Redirect form')
- vc = self._search_regex(
- r'<input type="hidden" name="jschl_vc" value="([^"]+)"/>',
- redir_webpage, 'redirect vc value')
- av = re.search(
- r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);',
- redir_webpage)
- if av is None:
- raise ExtractorError('Cannot find redirect math task')
- av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3))
-
- parsed_url = compat_urllib_parse_urlparse(url)
- av_val = av_res + len(parsed_url.netloc)
- confirm_url = (
- parsed_url.scheme + '://' + parsed_url.netloc
- + action + '?'
- + compat_urllib_parse_urlencode({
- 'jschl_vc': vc, 'jschl_answer': compat_str(av_val)}))
- self._download_webpage(
- confirm_url, video_id,
- note='Confirming after redirect')
- webpage = self._download_webpage(url, video_id)
-
- FORMATS = ('normal', 'hq')
- quality = qualities(FORMATS)
- formats = []
- for format_id in FORMATS:
- rex = r"var %s_video_file = '(.*?)';" % re.escape(format_id)
- video_url = self._search_regex(rex, webpage, 'video file URLx',
- fatal=False)
- if not video_url:
- continue
- formats.append({
- 'format_id': format_id,
- 'url': video_url,
- 'quality': quality(format_id),
- })
- self._sort_formats(formats)
- video_title = self._og_search_title(webpage)
- video_description = self._og_search_description(webpage)
-
- return {
- '_type': 'video',
- 'id': video_id,
- 'formats': formats,
- 'title': video_title,
- 'description': video_description
- }
diff --git a/youtube_dl/extractor/adobetv.py b/youtube_dl/extractor/adobetv.py
deleted file mode 100644
index 008c98e51..000000000
--- a/youtube_dl/extractor/adobetv.py
+++ /dev/null
@@ -1,197 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- parse_duration,
- unified_strdate,
- str_to_int,
- int_or_none,
- float_or_none,
- ISO639Utils,
- determine_ext,
-)
-
-
-class AdobeTVBaseIE(InfoExtractor):
- _API_BASE_URL = 'http://tv.adobe.com/api/v4/'
-
-
-class AdobeTVIE(AdobeTVBaseIE):
- _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?watch/(?P<show_urlname>[^/]+)/(?P<id>[^/]+)'
-
- _TEST = {
- 'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/',
- 'md5': '9bc5727bcdd55251f35ad311ca74fa1e',
- 'info_dict': {
- 'id': '10981',
- 'ext': 'mp4',
- 'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop',
- 'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311',
- 'thumbnail': r're:https?://.*\.jpg$',
- 'upload_date': '20110914',
- 'duration': 60,
- 'view_count': int,
- },
- }
-
- def _real_extract(self, url):
- language, show_urlname, urlname = re.match(self._VALID_URL, url).groups()
- if not language:
- language = 'en'
-
- video_data = self._download_json(
- self._API_BASE_URL + 'episode/get/?language=%s&show_urlname=%s&urlname=%s&disclosure=standard' % (language, show_urlname, urlname),
- urlname)['data'][0]
-
- formats = [{
- 'url': source['url'],
- 'format_id': source.get('quality_level') or source['url'].split('-')[-1].split('.')[0] or None,
- 'width': int_or_none(source.get('width')),
- 'height': int_or_none(source.get('height')),
- 'tbr': int_or_none(source.get('video_data_rate')),
- } for source in video_data['videos']]
- self._sort_formats(formats)
-
- return {
- 'id': compat_str(video_data['id']),
- 'title': video_data['title'],
- 'description': video_data.get('description'),
- 'thumbnail': video_data.get('thumbnail'),
- 'upload_date': unified_strdate(video_data.get('start_date')),
- 'duration': parse_duration(video_data.get('duration')),
- 'view_count': str_to_int(video_data.get('playcount')),
- 'formats': formats,
- }
-
-
-class AdobeTVPlaylistBaseIE(AdobeTVBaseIE):
- def _parse_page_data(self, page_data):
- return [self.url_result(self._get_element_url(element_data)) for element_data in page_data]
-
- def _extract_playlist_entries(self, url, display_id):
- page = self._download_json(url, display_id)
- entries = self._parse_page_data(page['data'])
- for page_num in range(2, page['paging']['pages'] + 1):
- entries.extend(self._parse_page_data(
- self._download_json(url + '&page=%d' % page_num, display_id)['data']))
- return entries
-
-
-class AdobeTVShowIE(AdobeTVPlaylistBaseIE):
- _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?show/(?P<id>[^/]+)'
-
- _TEST = {
- 'url': 'http://tv.adobe.com/show/the-complete-picture-with-julieanne-kost',
- 'info_dict': {
- 'id': '36',
- 'title': 'The Complete Picture with Julieanne Kost',
- 'description': 'md5:fa50867102dcd1aa0ddf2ab039311b27',
- },
- 'playlist_mincount': 136,
- }
-
- def _get_element_url(self, element_data):
- return element_data['urls'][0]
-
- def _real_extract(self, url):
- language, show_urlname = re.match(self._VALID_URL, url).groups()
- if not language:
- language = 'en'
- query = 'language=%s&show_urlname=%s' % (language, show_urlname)
-
- show_data = self._download_json(self._API_BASE_URL + 'show/get/?%s' % query, show_urlname)['data'][0]
-
- return self.playlist_result(
- self._extract_playlist_entries(self._API_BASE_URL + 'episode/?%s' % query, show_urlname),
- compat_str(show_data['id']),
- show_data['show_name'],
- show_data['show_description'])
-
-
-class AdobeTVChannelIE(AdobeTVPlaylistBaseIE):
- _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?channel/(?P<id>[^/]+)(?:/(?P<category_urlname>[^/]+))?'
-
- _TEST = {
- 'url': 'http://tv.adobe.com/channel/development',
- 'info_dict': {
- 'id': 'development',
- },
- 'playlist_mincount': 96,
- }
-
- def _get_element_url(self, element_data):
- return element_data['url']
-
- def _real_extract(self, url):
- language, channel_urlname, category_urlname = re.match(self._VALID_URL, url).groups()
- if not language:
- language = 'en'
- query = 'language=%s&channel_urlname=%s' % (language, channel_urlname)
- if category_urlname:
- query += '&category_urlname=%s' % category_urlname
-
- return self.playlist_result(
- self._extract_playlist_entries(self._API_BASE_URL + 'show/?%s' % query, channel_urlname),
- channel_urlname)
-
-
-class AdobeTVVideoIE(InfoExtractor):
- _VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)'
-
- _TEST = {
- # From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners
- 'url': 'https://video.tv.adobe.com/v/2456/',
- 'md5': '43662b577c018ad707a63766462b1e87',
- 'info_dict': {
- 'id': '2456',
- 'ext': 'mp4',
- 'title': 'New experience with Acrobat DC',
- 'description': 'New experience with Acrobat DC',
- 'duration': 248.667,
- },
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- video_data = self._parse_json(self._search_regex(
- r'var\s+bridge\s*=\s*([^;]+);', webpage, 'bridged data'), video_id)
-
- formats = [{
- 'format_id': '%s-%s' % (determine_ext(source['src']), source.get('height')),
- 'url': source['src'],
- 'width': int_or_none(source.get('width')),
- 'height': int_or_none(source.get('height')),
- 'tbr': int_or_none(source.get('bitrate')),
- } for source in video_data['sources']]
- self._sort_formats(formats)
-
- # For both metadata and downloaded files the duration varies among
- # formats. I just pick the max one
- duration = max(filter(None, [
- float_or_none(source.get('duration'), scale=1000)
- for source in video_data['sources']]))
-
- subtitles = {}
- for translation in video_data.get('translations', []):
- lang_id = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium'])
- if lang_id not in subtitles:
- subtitles[lang_id] = []
- subtitles[lang_id].append({
- 'url': translation['vttPath'],
- 'ext': 'vtt',
- })
-
- return {
- 'id': video_id,
- 'formats': formats,
- 'title': video_data['title'],
- 'description': video_data.get('description'),
- 'thumbnail': video_data['video'].get('poster'),
- 'duration': duration,
- 'subtitles': subtitles,
- }
diff --git a/youtube_dl/extractor/americastestkitchen.py b/youtube_dl/extractor/americastestkitchen.py
deleted file mode 100644
index 8b32aa886..000000000
--- a/youtube_dl/extractor/americastestkitchen.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- clean_html,
- int_or_none,
- try_get,
- unified_strdate,
-)
-
-
-class AmericasTestKitchenIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P<id>\d+)'
- _TESTS = [{
- 'url': 'https://www.americastestkitchen.com/episode/548-summer-dinner-party',
- 'md5': 'b861c3e365ac38ad319cfd509c30577f',
- 'info_dict': {
- 'id': '1_5g5zua6e',
- 'title': 'Summer Dinner Party',
- 'ext': 'mp4',
- 'description': 'md5:858d986e73a4826979b6a5d9f8f6a1ec',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'timestamp': 1497285541,
- 'upload_date': '20170612',
- 'uploader_id': 'roger.metcalf@americastestkitchen.com',
- 'release_date': '20170617',
- 'series': "America's Test Kitchen",
- 'season_number': 17,
- 'episode': 'Summer Dinner Party',
- 'episode_number': 24,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- video_data = self._parse_json(
- self._search_regex(
- r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
- webpage, 'initial context'),
- video_id)
-
- ep_data = try_get(
- video_data,
- (lambda x: x['episodeDetail']['content']['data'],
- lambda x: x['videoDetail']['content']['data']), dict)
- ep_meta = ep_data.get('full_video', {})
-
- zype_id = ep_meta.get('zype_id')
- if zype_id:
- embed_url = 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id
- ie_key = 'Zype'
- else:
- partner_id = self._search_regex(
- r'src=["\'](?:https?:)?//(?:[^/]+\.)kaltura\.com/(?:[^/]+/)*(?:p|partner_id)/(\d+)',
- webpage, 'kaltura partner id')
- external_id = ep_data.get('external_id') or ep_meta['external_id']
- embed_url = 'kaltura:%s:%s' % (partner_id, external_id)
- ie_key = 'Kaltura'
-
- title = ep_data.get('title') or ep_meta.get('title')
- description = clean_html(ep_meta.get('episode_description') or ep_data.get(
- 'description') or ep_meta.get('description'))
- thumbnail = try_get(ep_meta, lambda x: x['photo']['image_url'])
- release_date = unified_strdate(ep_data.get('aired_at'))
-
- season_number = int_or_none(ep_meta.get('season_number'))
- episode = ep_meta.get('title')
- episode_number = int_or_none(ep_meta.get('episode_number'))
-
- return {
- '_type': 'url_transparent',
- 'url': embed_url,
- 'ie_key': ie_key,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'release_date': release_date,
- 'series': "America's Test Kitchen",
- 'season_number': season_number,
- 'episode': episode,
- 'episode_number': episode_number,
- }
diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py
deleted file mode 100644
index a9ef733e0..000000000
--- a/youtube_dl/extractor/appletrailers.py
+++ /dev/null
@@ -1,283 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-import json
-
-from .common import InfoExtractor
-from ..compat import compat_urlparse
-from ..utils import (
- int_or_none,
- parse_duration,
- unified_strdate,
-)
-
-
-class AppleTrailersIE(InfoExtractor):
- IE_NAME = 'appletrailers'
- _VALID_URL = r'https?://(?:www\.|movie)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'
- _TESTS = [{
- 'url': 'http://trailers.apple.com/trailers/wb/manofsteel/',
- 'info_dict': {
- 'id': '5111',
- 'title': 'Man of Steel',
- },
- 'playlist': [
- {
- 'md5': 'd97a8e575432dbcb81b7c3acb741f8a8',
- 'info_dict': {
- 'id': 'manofsteel-trailer4',
- 'ext': 'mov',
- 'duration': 111,
- 'title': 'Trailer 4',
- 'upload_date': '20130523',
- 'uploader_id': 'wb',
- },
- },
- {
- 'md5': 'b8017b7131b721fb4e8d6f49e1df908c',
- 'info_dict': {
- 'id': 'manofsteel-trailer3',
- 'ext': 'mov',
- 'duration': 182,
- 'title': 'Trailer 3',
- 'upload_date': '20130417',
- 'uploader_id': 'wb',
- },
- },
- {
- 'md5': 'd0f1e1150989b9924679b441f3404d48',
- 'info_dict': {
- 'id': 'manofsteel-trailer',
- 'ext': 'mov',
- 'duration': 148,
- 'title': 'Trailer',
- 'upload_date': '20121212',
- 'uploader_id': 'wb',
- },
- },
- {
- 'md5': '5fe08795b943eb2e757fa95cb6def1cb',
- 'info_dict': {
- 'id': 'manofsteel-teaser',
- 'ext': 'mov',
- 'duration': 93,
- 'title': 'Teaser',
- 'upload_date': '20120721',
- 'uploader_id': 'wb',
- },
- },
- ]
- }, {
- 'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/',
- 'info_dict': {
- 'id': '4489',
- 'title': 'Blackthorn',
- },
- 'playlist_mincount': 2,
- 'expected_warnings': ['Unable to download JSON metadata'],
- }, {
- # json data only available from http://trailers.apple.com/trailers/feeds/data/15881.json
- 'url': 'http://trailers.apple.com/trailers/fox/kungfupanda3/',
- 'info_dict': {
- 'id': '15881',
- 'title': 'Kung Fu Panda 3',
- },
- 'playlist_mincount': 4,
- }, {
- 'url': 'http://trailers.apple.com/ca/metropole/autrui/',
- 'only_matching': True,
- }, {
- 'url': 'http://movietrailers.apple.com/trailers/focus_features/kuboandthetwostrings/',
- 'only_matching': True,
- }]
-
- _JSON_RE = r'iTunes.playURL\((.*?)\);'
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- movie = mobj.group('movie')
- uploader_id = mobj.group('company')
-
- webpage = self._download_webpage(url, movie)
- film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id')
- film_data = self._download_json(
- 'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id,
- film_id, fatal=False)
-
- if film_data:
- entries = []
- for clip in film_data.get('clips', []):
- clip_title = clip['title']
-
- formats = []
- for version, version_data in clip.get('versions', {}).items():
- for size, size_data in version_data.get('sizes', {}).items():
- src = size_data.get('src')
- if not src:
- continue
- formats.append({
- 'format_id': '%s-%s' % (version, size),
- 'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src),
- 'width': int_or_none(size_data.get('width')),
- 'height': int_or_none(size_data.get('height')),
- 'language': version[:2],
- })
- self._sort_formats(formats)
-
- entries.append({
- 'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(),
- 'formats': formats,
- 'title': clip_title,
- 'thumbnail': clip.get('screen') or clip.get('thumb'),
- 'duration': parse_duration(clip.get('runtime') or clip.get('faded')),
- 'upload_date': unified_strdate(clip.get('posted')),
- 'uploader_id': uploader_id,
- })
-
- page_data = film_data.get('page', {})
- return self.playlist_result(entries, film_id, page_data.get('movie_title'))
-
- playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
-
- def fix_html(s):
- s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
- s = re.sub(r'<img ([^<]*?)/?>', r'<img \1/>', s)
- # The ' in the onClick attributes are not escaped, it couldn't be parsed
- # like: http://trailers.apple.com/trailers/wb/gravity/
-
- def _clean_json(m):
- return 'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
- s = re.sub(self._JSON_RE, _clean_json, s)
- s = '<html>%s</html>' % s
- return s
- doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
-
- playlist = []
- for li in doc.findall('./div/ul/li'):
- on_click = li.find('.//a').attrib['onClick']
- trailer_info_json = self._search_regex(self._JSON_RE,
- on_click, 'trailer info')
- trailer_info = json.loads(trailer_info_json)
- first_url = trailer_info.get('url')
- if not first_url:
- continue
- title = trailer_info['title']
- video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
- thumbnail = li.find('.//img').attrib['src']
- upload_date = trailer_info['posted'].replace('-', '')
-
- runtime = trailer_info['runtime']
- m = re.search(r'(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime)
- duration = None
- if m:
- duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
-
- trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
- settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)
- settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json')
-
- formats = []
- for format in settings['metadata']['sizes']:
- # The src is a file pointing to the real video file
- format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src'])
- formats.append({
- 'url': format_url,
- 'format': format['type'],
- 'width': int_or_none(format['width']),
- 'height': int_or_none(format['height']),
- })
-
- self._sort_formats(formats)
-
- playlist.append({
- '_type': 'video',
- 'id': video_id,
- 'formats': formats,
- 'title': title,
- 'duration': duration,
- 'thumbnail': thumbnail,
- 'upload_date': upload_date,
- 'uploader_id': uploader_id,
- 'http_headers': {
- 'User-Agent': 'QuickTime compatible (youtube-dl)',
- },
- })
-
- return {
- '_type': 'playlist',
- 'id': movie,
- 'entries': playlist,
- }
-
-
-class AppleTrailersSectionIE(InfoExtractor):
- IE_NAME = 'appletrailers:section'
- _SECTIONS = {
- 'justadded': {
- 'feed_path': 'just_added',
- 'title': 'Just Added',
- },
- 'exclusive': {
- 'feed_path': 'exclusive',
- 'title': 'Exclusive',
- },
- 'justhd': {
- 'feed_path': 'just_hd',
- 'title': 'Just HD',
- },
- 'mostpopular': {
- 'feed_path': 'most_pop',
- 'title': 'Most Popular',
- },
- 'moviestudios': {
- 'feed_path': 'studios',
- 'title': 'Movie Studios',
- },
- }
- _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>%s)' % '|'.join(_SECTIONS)
- _TESTS = [{
- 'url': 'http://trailers.apple.com/#section=justadded',
- 'info_dict': {
- 'title': 'Just Added',
- 'id': 'justadded',
- },
- 'playlist_mincount': 80,
- }, {
- 'url': 'http://trailers.apple.com/#section=exclusive',
- 'info_dict': {
- 'title': 'Exclusive',
- 'id': 'exclusive',
- },
- 'playlist_mincount': 80,
- }, {
- 'url': 'http://trailers.apple.com/#section=justhd',
- 'info_dict': {
- 'title': 'Just HD',
- 'id': 'justhd',
- },
- 'playlist_mincount': 80,
- }, {
- 'url': 'http://trailers.apple.com/#section=mostpopular',
- 'info_dict': {
- 'title': 'Most Popular',
- 'id': 'mostpopular',
- },
- 'playlist_mincount': 30,
- }, {
- 'url': 'http://trailers.apple.com/#section=moviestudios',
- 'info_dict': {
- 'title': 'Movie Studios',
- 'id': 'moviestudios',
- },
- 'playlist_mincount': 80,
- }]
-
- def _real_extract(self, url):
- section = self._match_id(url)
- section_data = self._download_json(
- 'http://trailers.apple.com/trailers/home/feeds/%s.json' % self._SECTIONS[section]['feed_path'],
- section)
- entries = [
- self.url_result('http://trailers.apple.com' + e['location'])
- for e in section_data]
- return self.playlist_result(entries, section, self._SECTIONS[section]['title'])
diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py
deleted file mode 100644
index 8adae4644..000000000
--- a/youtube_dl/extractor/ard.py
+++ /dev/null
@@ -1,400 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from .generic import GenericIE
-from ..utils import (
- determine_ext,
- ExtractorError,
- int_or_none,
- parse_duration,
- qualities,
- str_or_none,
- try_get,
- unified_strdate,
- unified_timestamp,
- update_url_query,
- url_or_none,
- xpath_text,
-)
-from ..compat import compat_etree_fromstring
-
-
-class ARDMediathekIE(InfoExtractor):
- IE_NAME = 'ARD:mediathek'
- _VALID_URL = r'^https?://(?:(?:(?:www|classic)\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
-
- _TESTS = [{
- # available till 26.07.2022
- 'url': 'http://www.ardmediathek.de/tv/S%C3%9CDLICHT/Was-ist-die-Kunst-der-Zukunft-liebe-Ann/BR-Fernsehen/Video?bcastId=34633636&documentId=44726822',
- 'info_dict': {
- 'id': '44726822',
- 'ext': 'mp4',
- 'title': 'Was ist die Kunst der Zukunft, liebe Anna McCarthy?',
- 'description': 'md5:4ada28b3e3b5df01647310e41f3a62f5',
- 'duration': 1740,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- }
- }, {
- 'url': 'https://one.ard.de/tv/Mord-mit-Aussicht/Mord-mit-Aussicht-6-39-T%C3%B6dliche-Nach/ONE/Video?bcastId=46384294&documentId=55586872',
- 'only_matching': True,
- }, {
- # audio
- 'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
- 'only_matching': True,
- }, {
- 'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
- 'only_matching': True,
- }, {
- # audio
- 'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
- 'only_matching': True,
- }, {
- 'url': 'https://classic.ardmediathek.de/tv/Panda-Gorilla-Co/Panda-Gorilla-Co-Folge-274/Das-Erste/Video?bcastId=16355486&documentId=58234698',
- 'only_matching': True,
- }]
-
- @classmethod
- def suitable(cls, url):
- return False if ARDBetaMediathekIE.suitable(url) else super(ARDMediathekIE, cls).suitable(url)
-
- def _extract_media_info(self, media_info_url, webpage, video_id):
- media_info = self._download_json(
- media_info_url, video_id, 'Downloading media JSON')
-
- formats = self._extract_formats(media_info, video_id)
-
- if not formats:
- if '"fsk"' in webpage:
- raise ExtractorError(
- 'This video is only available after 20:00', expected=True)
- elif media_info.get('_geoblocked'):
- raise ExtractorError('This video is not available due to geo restriction', expected=True)
-
- self._sort_formats(formats)
-
- duration = int_or_none(media_info.get('_duration'))
- thumbnail = media_info.get('_previewImage')
- is_live = media_info.get('_isLive') is True
-
- subtitles = {}
- subtitle_url = media_info.get('_subtitleUrl')
- if subtitle_url:
- subtitles['de'] = [{
- 'ext': 'ttml',
- 'url': subtitle_url,
- }]
-
- return {
- 'id': video_id,
- 'duration': duration,
- 'thumbnail': thumbnail,
- 'is_live': is_live,
- 'formats': formats,
- 'subtitles': subtitles,
- }
-
- def _extract_formats(self, media_info, video_id):
- type_ = media_info.get('_type')
- media_array = media_info.get('_mediaArray', [])
- formats = []
- for num, media in enumerate(media_array):
- for stream in media.get('_mediaStreamArray', []):
- stream_urls = stream.get('_stream')
- if not stream_urls:
- continue
- if not isinstance(stream_urls, list):
- stream_urls = [stream_urls]
- quality = stream.get('_quality')
- server = stream.get('_server')
- for stream_url in stream_urls:
- if not url_or_none(stream_url):
- continue
- ext = determine_ext(stream_url)
- if quality != 'auto' and ext in ('f4m', 'm3u8'):
- continue
- if ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- update_url_query(stream_url, {
- 'hdcore': '3.1.1',
- 'plugin': 'aasp-3.1.1.69.124'
- }),
- video_id, f4m_id='hds', fatal=False))
- elif ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
- else:
- if server and server.startswith('rtmp'):
- f = {
- 'url': server,
- 'play_path': stream_url,
- 'format_id': 'a%s-rtmp-%s' % (num, quality),
- }
- else:
- f = {
- 'url': stream_url,
- 'format_id': 'a%s-%s-%s' % (num, ext, quality)
- }
- m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$', stream_url)
- if m:
- f.update({
- 'width': int(m.group('width')),
- 'height': int(m.group('height')),
- })
- if type_ == 'audio':
- f['vcodec'] = 'none'
- formats.append(f)
- return formats
-
- def _real_extract(self, url):
- # determine video id from url
- m = re.match(self._VALID_URL, url)
-
- document_id = None
-
- numid = re.search(r'documentId=([0-9]+)', url)
- if numid:
- document_id = video_id = numid.group(1)
- else:
- video_id = m.group('video_id')
-
- webpage = self._download_webpage(url, video_id)
-
- ERRORS = (
- ('>Leider liegt eine Störung vor.', 'Video %s is unavailable'),
- ('>Der gewünschte Beitrag ist nicht mehr verfügbar.<',
- 'Video %s is no longer available'),
- )
-
- for pattern, message in ERRORS:
- if pattern in webpage:
- raise ExtractorError(message % video_id, expected=True)
-
- if re.search(r'[\?&]rss($|[=&])', url):
- doc = compat_etree_fromstring(webpage.encode('utf-8'))
- if doc.tag == 'rss':
- return GenericIE()._extract_rss(url, video_id, doc)
-
- title = self._html_search_regex(
- [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
- r'<meta name="dcterms\.title" content="(.*?)"/>',
- r'<h4 class="headline">(.*?)</h4>',
- r'<title[^>]*>(.*?)</title>'],
- webpage, 'title')
- description = self._html_search_meta(
- 'dcterms.abstract', webpage, 'description', default=None)
- if description is None:
- description = self._html_search_meta(
- 'description', webpage, 'meta description', default=None)
- if description is None:
- description = self._html_search_regex(
- r'<p\s+class="teasertext">(.+?)</p>',
- webpage, 'teaser text', default=None)
-
- # Thumbnail is sometimes not present.
- # It is in the mobile version, but that seems to use a different URL
- # structure altogether.
- thumbnail = self._og_search_thumbnail(webpage, default=None)
-
- media_streams = re.findall(r'''(?x)
- mediaCollection\.addMediaStream\([0-9]+,\s*[0-9]+,\s*"[^"]*",\s*
- "([^"]+)"''', webpage)
-
- if media_streams:
- QUALITIES = qualities(['lo', 'hi', 'hq'])
- formats = []
- for furl in set(media_streams):
- if furl.endswith('.f4m'):
- fid = 'f4m'
- else:
- fid_m = re.match(r'.*\.([^.]+)\.[^.]+$', furl)
- fid = fid_m.group(1) if fid_m else None
- formats.append({
- 'quality': QUALITIES(fid),
- 'format_id': fid,
- 'url': furl,
- })
- self._sort_formats(formats)
- info = {
- 'formats': formats,
- }
- else: # request JSON file
- if not document_id:
- video_id = self._search_regex(
- r'/play/(?:config|media)/(\d+)', webpage, 'media id')
- info = self._extract_media_info(
- 'http://www.ardmediathek.de/play/media/%s' % video_id,
- webpage, video_id)
-
- info.update({
- 'id': video_id,
- 'title': self._live_title(title) if info.get('is_live') else title,
- 'description': description,
- 'thumbnail': thumbnail,
- })
-
- return info
-
-
-class ARDIE(InfoExtractor):
- _VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
- _TESTS = [{
- # available till 14.02.2019
- 'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html',
- 'md5': '8e4ec85f31be7c7fc08a26cdbc5a1f49',
- 'info_dict': {
- 'display_id': 'das-groko-drama-zerlegen-sich-die-volksparteien-video',
- 'id': '102',
- 'ext': 'mp4',
- 'duration': 4435.0,
- 'title': 'Das GroKo-Drama: Zerlegen sich die Volksparteien?',
- 'upload_date': '20180214',
- 'thumbnail': r're:^https?://.*\.jpg$',
- },
- }, {
- 'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- display_id = mobj.group('display_id')
-
- player_url = mobj.group('mainurl') + '~playerXml.xml'
- doc = self._download_xml(player_url, display_id)
- video_node = doc.find('./video')
- upload_date = unified_strdate(xpath_text(
- video_node, './broadcastDate'))
- thumbnail = xpath_text(video_node, './/teaserImage//variant/url')
-
- formats = []
- for a in video_node.findall('.//asset'):
- f = {
- 'format_id': a.attrib['type'],
- 'width': int_or_none(a.find('./frameWidth').text),
- 'height': int_or_none(a.find('./frameHeight').text),
- 'vbr': int_or_none(a.find('./bitrateVideo').text),
- 'abr': int_or_none(a.find('./bitrateAudio').text),
- 'vcodec': a.find('./codecVideo').text,
- 'tbr': int_or_none(a.find('./totalBitrate').text),
- }
- if a.find('./serverPrefix').text:
- f['url'] = a.find('./serverPrefix').text
- f['playpath'] = a.find('./fileName').text
- else:
- f['url'] = a.find('./fileName').text
- formats.append(f)
- self._sort_formats(formats)
-
- return {
- 'id': mobj.group('id'),
- 'formats': formats,
- 'display_id': display_id,
- 'title': video_node.find('./title').text,
- 'duration': parse_duration(video_node.find('./duration').text),
- 'upload_date': upload_date,
- 'thumbnail': thumbnail,
- }
-
-
-class ARDBetaMediathekIE(InfoExtractor):
- _VALID_URL = r'https://(?:beta|www)\.ardmediathek\.de/[^/]+/(?:player|live)/(?P<video_id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^/?#]+))?'
- _TESTS = [{
- 'url': 'https://beta.ardmediathek.de/ard/player/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE/die-robuste-roswita',
- 'md5': '2d02d996156ea3c397cfc5036b5d7f8f',
- 'info_dict': {
- 'display_id': 'die-robuste-roswita',
- 'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
- 'title': 'Tatort: Die robuste Roswita',
- 'description': r're:^Der Mord.*trüber ist als die Ilm.',
- 'duration': 5316,
- 'thumbnail': 'https://img.ardmediathek.de/standard/00/55/43/59/34/-1774185891/16x9/960?mandant=ard',
- 'upload_date': '20180826',
- 'ext': 'mp4',
- },
- }, {
- 'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3N3ci5kZS9hZXgvbzEwNzE5MTU/',
- 'only_matching': True,
- }, {
- 'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('video_id')
- display_id = mobj.group('display_id') or video_id
-
- webpage = self._download_webpage(url, display_id)
- data_json = self._search_regex(r'window\.__APOLLO_STATE__\s*=\s*(\{.*);\n', webpage, 'json')
- data = self._parse_json(data_json, display_id)
-
- res = {
- 'id': video_id,
- 'display_id': display_id,
- }
- formats = []
- subtitles = {}
- geoblocked = False
- for widget in data.values():
- if widget.get('_geoblocked') is True:
- geoblocked = True
- if '_duration' in widget:
- res['duration'] = int_or_none(widget['_duration'])
- if 'clipTitle' in widget:
- res['title'] = widget['clipTitle']
- if '_previewImage' in widget:
- res['thumbnail'] = widget['_previewImage']
- if 'broadcastedOn' in widget:
- res['timestamp'] = unified_timestamp(widget['broadcastedOn'])
- if 'synopsis' in widget:
- res['description'] = widget['synopsis']
- subtitle_url = url_or_none(widget.get('_subtitleUrl'))
- if subtitle_url:
- subtitles.setdefault('de', []).append({
- 'ext': 'ttml',
- 'url': subtitle_url,
- })
- if '_quality' in widget:
- format_url = url_or_none(try_get(
- widget, lambda x: x['_stream']['json'][0]))
- if not format_url:
- continue
- ext = determine_ext(format_url)
- if ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- format_url + '?hdcore=3.11.0',
- video_id, f4m_id='hds', fatal=False))
- elif ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- format_url, video_id, 'mp4', m3u8_id='hls',
- fatal=False))
- else:
- # HTTP formats are not available when geoblocked is True,
- # other formats are fine though
- if geoblocked:
- continue
- quality = str_or_none(widget.get('_quality'))
- formats.append({
- 'format_id': ('http-' + quality) if quality else 'http',
- 'url': format_url,
- 'preference': 10, # Plain HTTP, that's nice
- })
-
- if not formats and geoblocked:
- self.raise_geo_restricted(
- msg='This video is not available due to geoblocking',
- countries=['DE'])
-
- self._sort_formats(formats)
- res.update({
- 'subtitles': subtitles,
- 'formats': formats,
- })
-
- return res
diff --git a/youtube_dl/extractor/atresplayer.py b/youtube_dl/extractor/atresplayer.py
deleted file mode 100644
index ae1c09427..000000000
--- a/youtube_dl/extractor/atresplayer.py
+++ /dev/null
@@ -1,202 +0,0 @@
-from __future__ import unicode_literals
-
-import time
-import hmac
-import hashlib
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- ExtractorError,
- float_or_none,
- int_or_none,
- sanitized_Request,
- urlencode_postdata,
- xpath_text,
-)
-
-
-class AtresPlayerIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/television/[^/]+/[^/]+/[^/]+/(?P<id>.+?)_\d+\.html'
- _NETRC_MACHINE = 'atresplayer'
- _TESTS = [
- {
- 'url': 'http://www.atresplayer.com/television/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_2014122100174.html',
- 'md5': 'efd56753cda1bb64df52a3074f62e38a',
- 'info_dict': {
- 'id': 'capitulo-10-especial-solidario-nochebuena',
- 'ext': 'mp4',
- 'title': 'Especial Solidario de Nochebuena',
- 'description': 'md5:e2d52ff12214fa937107d21064075bf1',
- 'duration': 5527.6,
- 'thumbnail': r're:^https?://.*\.jpg$',
- },
- 'skip': 'This video is only available for registered users'
- },
- {
- 'url': 'http://www.atresplayer.com/television/especial/videoencuentros/temporada-1/capitulo-112-david-bustamante_2014121600375.html',
- 'md5': '6e52cbb513c405e403dbacb7aacf8747',
- 'info_dict': {
- 'id': 'capitulo-112-david-bustamante',
- 'ext': 'flv',
- 'title': 'David Bustamante',
- 'description': 'md5:f33f1c0a05be57f6708d4dd83a3b81c6',
- 'duration': 1439.0,
- 'thumbnail': r're:^https?://.*\.jpg$',
- },
- },
- {
- 'url': 'http://www.atresplayer.com/television/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_2014122400174.html',
- 'only_matching': True,
- },
- ]
-
- _USER_AGENT = 'Dalvik/1.6.0 (Linux; U; Android 4.3; GT-I9300 Build/JSS15J'
- _MAGIC = 'QWtMLXs414Yo+c#_+Q#K@NN)'
- _TIMESTAMP_SHIFT = 30000
-
- _TIME_API_URL = 'http://servicios.atresplayer.com/api/admin/time.json'
- _URL_VIDEO_TEMPLATE = 'https://servicios.atresplayer.com/api/urlVideo/{1}/{0}/{1}|{2}|{3}.json'
- _PLAYER_URL_TEMPLATE = 'https://servicios.atresplayer.com/episode/getplayer.json?episodePk=%s'
- _EPISODE_URL_TEMPLATE = 'http://www.atresplayer.com/episodexml/%s'
-
- _LOGIN_URL = 'https://servicios.atresplayer.com/j_spring_security_check'
-
- _ERRORS = {
- 'UNPUBLISHED': 'We\'re sorry, but this video is not yet available.',
- 'DELETED': 'This video has expired and is no longer available for online streaming.',
- 'GEOUNPUBLISHED': 'We\'re sorry, but this video is not available in your region due to right restrictions.',
- # 'PREMIUM': 'PREMIUM',
- }
-
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
- login_form = {
- 'j_username': username,
- 'j_password': password,
- }
-
- request = sanitized_Request(
- self._LOGIN_URL, urlencode_postdata(login_form))
- request.add_header('Content-Type', 'application/x-www-form-urlencoded')
- response = self._download_webpage(
- request, None, 'Logging in')
-
- error = self._html_search_regex(
- r'(?s)<ul[^>]+class="[^"]*\blist_error\b[^"]*">(.+?)</ul>',
- response, 'error', default=None)
- if error:
- raise ExtractorError(
- 'Unable to login: %s' % error, expected=True)
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- episode_id = self._search_regex(
- r'episode="([^"]+)"', webpage, 'episode id')
-
- request = sanitized_Request(
- self._PLAYER_URL_TEMPLATE % episode_id,
- headers={'User-Agent': self._USER_AGENT})
- player = self._download_json(request, episode_id, 'Downloading player JSON')
-
- episode_type = player.get('typeOfEpisode')
- error_message = self._ERRORS.get(episode_type)
- if error_message:
- raise ExtractorError(
- '%s returned error: %s' % (self.IE_NAME, error_message), expected=True)
-
- formats = []
- video_url = player.get('urlVideo')
- if video_url:
- format_info = {
- 'url': video_url,
- 'format_id': 'http',
- }
- mobj = re.search(r'(?P<bitrate>\d+)K_(?P<width>\d+)x(?P<height>\d+)', video_url)
- if mobj:
- format_info.update({
- 'width': int_or_none(mobj.group('width')),
- 'height': int_or_none(mobj.group('height')),
- 'tbr': int_or_none(mobj.group('bitrate')),
- })
- formats.append(format_info)
-
- timestamp = int_or_none(self._download_webpage(
- self._TIME_API_URL,
- video_id, 'Downloading timestamp', fatal=False), 1000, time.time())
- timestamp_shifted = compat_str(timestamp + self._TIMESTAMP_SHIFT)
- token = hmac.new(
- self._MAGIC.encode('ascii'),
- (episode_id + timestamp_shifted).encode('utf-8'), hashlib.md5
- ).hexdigest()
-
- request = sanitized_Request(
- self._URL_VIDEO_TEMPLATE.format('windows', episode_id, timestamp_shifted, token),
- headers={'User-Agent': self._USER_AGENT})
-
- fmt_json = self._download_json(
- request, video_id, 'Downloading windows video JSON')
-
- result = fmt_json.get('resultDes')
- if result.lower() != 'ok':
- raise ExtractorError(
- '%s returned error: %s' % (self.IE_NAME, result), expected=True)
-
- for format_id, video_url in fmt_json['resultObject'].items():
- if format_id == 'token' or not video_url.startswith('http'):
- continue
- if 'geodeswowsmpra3player' in video_url:
- # f4m_path = video_url.split('smil:', 1)[-1].split('free_', 1)[0]
- # f4m_url = 'http://drg.antena3.com/{0}hds/es/sd.f4m'.format(f4m_path)
- # this videos are protected by DRM, the f4m downloader doesn't support them
- continue
- video_url_hd = video_url.replace('free_es', 'es')
- formats.extend(self._extract_f4m_formats(
- video_url_hd[:-9] + '/manifest.f4m', video_id, f4m_id='hds',
- fatal=False))
- formats.extend(self._extract_mpd_formats(
- video_url_hd[:-9] + '/manifest.mpd', video_id, mpd_id='dash',
- fatal=False))
- self._sort_formats(formats)
-
- path_data = player.get('pathData')
-
- episode = self._download_xml(
- self._EPISODE_URL_TEMPLATE % path_data, video_id,
- 'Downloading episode XML')
-
- duration = float_or_none(xpath_text(
- episode, './media/asset/info/technical/contentDuration', 'duration'))
-
- art = episode.find('./media/asset/info/art')
- title = xpath_text(art, './name', 'title')
- description = xpath_text(art, './description', 'description')
- thumbnail = xpath_text(episode, './media/asset/files/background', 'thumbnail')
-
- subtitles = {}
- subtitle_url = xpath_text(episode, './media/asset/files/subtitle', 'subtitle')
- if subtitle_url:
- subtitles['es'] = [{
- 'ext': 'srt',
- 'url': subtitle_url,
- }]
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'formats': formats,
- 'subtitles': subtitles,
- }
diff --git a/youtube_dl/extractor/audioboom.py b/youtube_dl/extractor/audioboom.py
deleted file mode 100644
index 393f381c6..000000000
--- a/youtube_dl/extractor/audioboom.py
+++ /dev/null
@@ -1,69 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import float_or_none
-
-
-class AudioBoomIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?audioboom\.com/(?:boos|posts)/(?P<id>[0-9]+)'
- _TESTS = [{
- 'url': 'https://audioboom.com/boos/4279833-3-09-2016-czaban-hour-3?t=0',
- 'md5': '63a8d73a055c6ed0f1e51921a10a5a76',
- 'info_dict': {
- 'id': '4279833',
- 'ext': 'mp3',
- 'title': '3/09/2016 Czaban Hour 3',
- 'description': 'Guest: Nate Davis - NFL free agency, Guest: Stan Gans',
- 'duration': 2245.72,
- 'uploader': 'SB Nation A.M.',
- 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/steveczabanyahoosportsradio',
- }
- }, {
- 'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- clip = None
-
- clip_store = self._parse_json(
- self._search_regex(
- r'data-new-clip-store=(["\'])(?P<json>{.*?"clipId"\s*:\s*%s.*?})\1' % video_id,
- webpage, 'clip store', default='{}', group='json'),
- video_id, fatal=False)
- if clip_store:
- clips = clip_store.get('clips')
- if clips and isinstance(clips, list) and isinstance(clips[0], dict):
- clip = clips[0]
-
- def from_clip(field):
- if clip:
- return clip.get(field)
-
- audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property(
- 'audio', webpage, 'audio url')
- title = from_clip('title') or self._og_search_title(webpage)
- description = from_clip('description') or self._og_search_description(webpage)
-
- duration = float_or_none(from_clip('duration') or self._html_search_meta(
- 'weibo:audio:duration', webpage))
-
- uploader = from_clip('author') or self._og_search_property(
- 'audio:artist', webpage, 'uploader', fatal=False)
- uploader_url = from_clip('author_url') or self._html_search_meta(
- 'audioboo:channel', webpage, 'uploader url')
-
- return {
- 'id': video_id,
- 'url': audio_url,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'uploader': uploader,
- 'uploader_url': uploader_url,
- }
diff --git a/youtube_dl/extractor/azmedien.py b/youtube_dl/extractor/azmedien.py
deleted file mode 100644
index fcbdc71b9..000000000
--- a/youtube_dl/extractor/azmedien.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import json
-import re
-
-from .common import InfoExtractor
-from .kaltura import KalturaIE
-
-
-class AZMedienIE(InfoExtractor):
- IE_DESC = 'AZ Medien videos'
- _VALID_URL = r'''(?x)
- https?://
- (?:www\.)?
- (?P<host>
- telezueri\.ch|
- telebaern\.tv|
- telem1\.ch
- )/
- [^/]+/
- (?P<id>
- [^/]+-(?P<article_id>\d+)
- )
- (?:
- \#video=
- (?P<kaltura_id>
- [_0-9a-z]+
- )
- )?
- '''
-
- _TESTS = [{
- 'url': 'https://www.telezueri.ch/sonntalk/bundesrats-vakanzen-eu-rahmenabkommen-133214569',
- 'info_dict': {
- 'id': '1_anruz3wy',
- 'ext': 'mp4',
- 'title': 'Bundesrats-Vakanzen / EU-Rahmenabkommen',
- 'uploader_id': 'TVOnline',
- 'upload_date': '20180930',
- 'timestamp': 1538328802,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
- 'only_matching': True
- }]
-
- _PARTNER_ID = '1719221'
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- host = mobj.group('host')
- video_id = mobj.group('id')
- entry_id = mobj.group('kaltura_id')
-
- if not entry_id:
- api_url = 'https://www.%s/api/pub/gql/%s' % (host, host.split('.')[0])
- payload = {
- 'query': '''query VideoContext($articleId: ID!) {
- article: node(id: $articleId) {
- ... on Article {
- mainAssetRelation {
- asset {
- ... on VideoAsset {
- kalturaId
- }
- }
- }
- }
- }
- }''',
- 'variables': {'articleId': 'Article:%s' % mobj.group('article_id')},
- }
- json_data = self._download_json(
- api_url, video_id, headers={
- 'Content-Type': 'application/json',
- },
- data=json.dumps(payload).encode())
- entry_id = json_data['data']['article']['mainAssetRelation']['asset']['kalturaId']
-
- return self.url_result(
- 'kaltura:%s:%s' % (self._PARTNER_ID, entry_id),
- ie=KalturaIE.ie_key(), video_id=entry_id)
diff --git a/youtube_dl/extractor/bambuser.py b/youtube_dl/extractor/bambuser.py
deleted file mode 100644
index 4400ff9c1..000000000
--- a/youtube_dl/extractor/bambuser.py
+++ /dev/null
@@ -1,142 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-import itertools
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- ExtractorError,
- float_or_none,
- int_or_none,
- sanitized_Request,
- urlencode_postdata,
-)
-
-
-class BambuserIE(InfoExtractor):
- IE_NAME = 'bambuser'
- _VALID_URL = r'https?://bambuser\.com/v/(?P<id>\d+)'
- _API_KEY = '005f64509e19a868399060af746a00aa'
- _LOGIN_URL = 'https://bambuser.com/user'
- _NETRC_MACHINE = 'bambuser'
-
- _TEST = {
- 'url': 'http://bambuser.com/v/4050584',
- # MD5 seems to be flaky, see https://travis-ci.org/ytdl-org/youtube-dl/jobs/14051016#L388
- # 'md5': 'fba8f7693e48fd4e8641b3fd5539a641',
- 'info_dict': {
- 'id': '4050584',
- 'ext': 'flv',
- 'title': 'Education engineering days - lightning talks',
- 'duration': 3741,
- 'uploader': 'pixelversity',
- 'uploader_id': '344706',
- 'timestamp': 1382976692,
- 'upload_date': '20131028',
- 'view_count': int,
- },
- 'params': {
- # It doesn't respect the 'Range' header, it would download the whole video
- # caused the travis builds to fail: https://travis-ci.org/ytdl-org/youtube-dl/jobs/14493845#L59
- 'skip_download': True,
- },
- }
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
- login_form = {
- 'form_id': 'user_login',
- 'op': 'Log in',
- 'name': username,
- 'pass': password,
- }
-
- request = sanitized_Request(
- self._LOGIN_URL, urlencode_postdata(login_form))
- request.add_header('Referer', self._LOGIN_URL)
- response = self._download_webpage(
- request, None, 'Logging in')
-
- login_error = self._html_search_regex(
- r'(?s)<div class="messages error">(.+?)</div>',
- response, 'login error', default=None)
- if login_error:
- raise ExtractorError(
- 'Unable to login: %s' % login_error, expected=True)
-
- def _real_initialize(self):
- self._login()
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- info = self._download_json(
- 'http://player-c.api.bambuser.com/getVideo.json?api_key=%s&vid=%s'
- % (self._API_KEY, video_id), video_id)
-
- error = info.get('error')
- if error:
- raise ExtractorError(
- '%s returned error: %s' % (self.IE_NAME, error), expected=True)
-
- result = info['result']
-
- return {
- 'id': video_id,
- 'title': result['title'],
- 'url': result['url'],
- 'thumbnail': result.get('preview'),
- 'duration': int_or_none(result.get('length')),
- 'uploader': result.get('username'),
- 'uploader_id': compat_str(result.get('owner', {}).get('uid')),
- 'timestamp': int_or_none(result.get('created')),
- 'fps': float_or_none(result.get('framerate')),
- 'view_count': int_or_none(result.get('views_total')),
- 'comment_count': int_or_none(result.get('comment_count')),
- }
-
-
-class BambuserChannelIE(InfoExtractor):
- IE_NAME = 'bambuser:channel'
- _VALID_URL = r'https?://bambuser\.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
- # The maximum number we can get with each request
- _STEP = 50
- _TEST = {
- 'url': 'http://bambuser.com/channel/pixelversity',
- 'info_dict': {
- 'title': 'pixelversity',
- },
- 'playlist_mincount': 60,
- }
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- user = mobj.group('user')
- urls = []
- last_id = ''
- for i in itertools.count(1):
- req_url = (
- 'http://bambuser.com/xhr-api/index.php?username={user}'
- '&sort=created&access_mode=0%2C1%2C2&limit={count}'
- '&method=broadcast&format=json&vid_older_than={last}'
- ).format(user=user, count=self._STEP, last=last_id)
- req = sanitized_Request(req_url)
- # Without setting this header, we wouldn't get any result
- req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
- data = self._download_json(
- req, user, 'Downloading page %d' % i)
- results = data['result']
- if not results:
- break
- last_id = results[-1]['vid']
- urls.extend(self.url_result(v['page'], 'Bambuser') for v in results)
-
- return {
- '_type': 'playlist',
- 'title': user,
- 'entries': urls,
- }
diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py
deleted file mode 100644
index f14b407dc..000000000
--- a/youtube_dl/extractor/bandcamp.py
+++ /dev/null
@@ -1,417 +0,0 @@
-from __future__ import unicode_literals
-
-import random
-import re
-import time
-
-from .common import InfoExtractor
-from ..compat import (
- compat_str,
- compat_urlparse,
-)
-from ..utils import (
- ExtractorError,
- float_or_none,
- int_or_none,
- KNOWN_EXTENSIONS,
- parse_filesize,
- str_or_none,
- try_get,
- unescapeHTML,
- update_url_query,
- unified_strdate,
- unified_timestamp,
- url_or_none,
-)
-
-
-class BandcampIE(InfoExtractor):
- _VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
- _TESTS = [{
- 'url': 'http://youtube-dl.bandcamp.com/track/youtube-dl-test-song',
- 'md5': 'c557841d5e50261777a6585648adf439',
- 'info_dict': {
- 'id': '1812978515',
- 'ext': 'mp3',
- 'title': "youtube-dl \"'/\\\u00e4\u21ad - youtube-dl test song \"'/\\\u00e4\u21ad",
- 'duration': 9.8485,
- },
- '_skip': 'There is a limit of 200 free downloads / month for the test song'
- }, {
- # free download
- 'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
- 'md5': '853e35bf34aa1d6fe2615ae612564b36',
- 'info_dict': {
- 'id': '2650410135',
- 'ext': 'aiff',
- 'title': 'Ben Prunty - Lanius (Battle)',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'Ben Prunty',
- 'timestamp': 1396508491,
- 'upload_date': '20140403',
- 'release_date': '20140403',
- 'duration': 260.877,
- 'track': 'Lanius (Battle)',
- 'track_number': 1,
- 'track_id': '2650410135',
- 'artist': 'Ben Prunty',
- 'album': 'FTL: Advanced Edition Soundtrack',
- },
- }, {
- # no free download, mp3 128
- 'url': 'https://relapsealumni.bandcamp.com/track/hail-to-fire',
- 'md5': 'fec12ff55e804bb7f7ebeb77a800c8b7',
- 'info_dict': {
- 'id': '2584466013',
- 'ext': 'mp3',
- 'title': 'Mastodon - Hail to Fire',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'Mastodon',
- 'timestamp': 1322005399,
- 'upload_date': '20111122',
- 'release_date': '20040207',
- 'duration': 120.79,
- 'track': 'Hail to Fire',
- 'track_number': 5,
- 'track_id': '2584466013',
- 'artist': 'Mastodon',
- 'album': 'Call of the Mastodon',
- },
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- title = mobj.group('title')
- webpage = self._download_webpage(url, title)
- thumbnail = self._html_search_meta('og:image', webpage, default=None)
-
- track_id = None
- track = None
- track_number = None
- duration = None
-
- formats = []
- track_info = self._parse_json(
- self._search_regex(
- r'trackinfo\s*:\s*\[\s*({.+?})\s*\]\s*,\s*?\n',
- webpage, 'track info', default='{}'), title)
- if track_info:
- file_ = track_info.get('file')
- if isinstance(file_, dict):
- for format_id, format_url in file_.items():
- if not url_or_none(format_url):
- continue
- ext, abr_str = format_id.split('-', 1)
- formats.append({
- 'format_id': format_id,
- 'url': self._proto_relative_url(format_url, 'http:'),
- 'ext': ext,
- 'vcodec': 'none',
- 'acodec': ext,
- 'abr': int_or_none(abr_str),
- })
- track = track_info.get('title')
- track_id = str_or_none(track_info.get('track_id') or track_info.get('id'))
- track_number = int_or_none(track_info.get('track_num'))
- duration = float_or_none(track_info.get('duration'))
-
- def extract(key):
- return self._search_regex(
- r'\b%s\s*["\']?\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % key,
- webpage, key, default=None, group='value')
-
- artist = extract('artist')
- album = extract('album_title')
- timestamp = unified_timestamp(
- extract('publish_date') or extract('album_publish_date'))
- release_date = unified_strdate(extract('album_release_date'))
-
- download_link = self._search_regex(
- r'freeDownloadPage\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
- 'download link', default=None, group='url')
- if download_link:
- track_id = self._search_regex(
- r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
- webpage, 'track id')
-
- download_webpage = self._download_webpage(
- download_link, track_id, 'Downloading free downloads page')
-
- blob = self._parse_json(
- self._search_regex(
- r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
- 'blob', group='blob'),
- track_id, transform_source=unescapeHTML)
-
- info = try_get(
- blob, (lambda x: x['digital_items'][0],
- lambda x: x['download_items'][0]), dict)
- if info:
- downloads = info.get('downloads')
- if isinstance(downloads, dict):
- if not track:
- track = info.get('title')
- if not artist:
- artist = info.get('artist')
- if not thumbnail:
- thumbnail = info.get('thumb_url')
-
- download_formats = {}
- download_formats_list = blob.get('download_formats')
- if isinstance(download_formats_list, list):
- for f in blob['download_formats']:
- name, ext = f.get('name'), f.get('file_extension')
- if all(isinstance(x, compat_str) for x in (name, ext)):
- download_formats[name] = ext.strip('.')
-
- for format_id, f in downloads.items():
- format_url = f.get('url')
- if not format_url:
- continue
- # Stat URL generation algorithm is reverse engineered from
- # download_*_bundle_*.js
- stat_url = update_url_query(
- format_url.replace('/download/', '/statdownload/'), {
- '.rand': int(time.time() * 1000 * random.random()),
- })
- format_id = f.get('encoding_name') or format_id
- stat = self._download_json(
- stat_url, track_id, 'Downloading %s JSON' % format_id,
- transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1],
- fatal=False)
- if not stat:
- continue
- retry_url = url_or_none(stat.get('retry_url'))
- if not retry_url:
- continue
- formats.append({
- 'url': self._proto_relative_url(retry_url, 'http:'),
- 'ext': download_formats.get(format_id),
- 'format_id': format_id,
- 'format_note': f.get('description'),
- 'filesize': parse_filesize(f.get('size_mb')),
- 'vcodec': 'none',
- })
-
- self._sort_formats(formats)
-
- title = '%s - %s' % (artist, track) if artist else track
-
- if not duration:
- duration = float_or_none(self._html_search_meta(
- 'duration', webpage, default=None))
-
- return {
- 'id': track_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'uploader': artist,
- 'timestamp': timestamp,
- 'release_date': release_date,
- 'duration': duration,
- 'track': track,
- 'track_number': track_number,
- 'track_id': track_id,
- 'artist': artist,
- 'album': album,
- 'formats': formats,
- }
-
-
-class BandcampAlbumIE(InfoExtractor):
- IE_NAME = 'Bandcamp:album'
- _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?'
-
- _TESTS = [{
- 'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
- 'playlist': [
- {
- 'md5': '39bc1eded3476e927c724321ddf116cf',
- 'info_dict': {
- 'id': '1353101989',
- 'ext': 'mp3',
- 'title': 'Intro',
- }
- },
- {
- 'md5': '1a2c32e2691474643e912cc6cd4bffaa',
- 'info_dict': {
- 'id': '38097443',
- 'ext': 'mp3',
- 'title': 'Kero One - Keep It Alive (Blazo remix)',
- }
- },
- ],
- 'info_dict': {
- 'title': 'Jazz Format Mixtape vol.1',
- 'id': 'jazz-format-mixtape-vol-1',
- 'uploader_id': 'blazo',
- },
- 'params': {
- 'playlistend': 2
- },
- 'skip': 'Bandcamp imposes download limits.'
- }, {
- 'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
- 'info_dict': {
- 'title': 'Hierophany of the Open Grave',
- 'uploader_id': 'nightbringer',
- 'id': 'hierophany-of-the-open-grave',
- },
- 'playlist_mincount': 9,
- }, {
- 'url': 'http://dotscale.bandcamp.com',
- 'info_dict': {
- 'title': 'Loom',
- 'id': 'dotscale',
- 'uploader_id': 'dotscale',
- },
- 'playlist_mincount': 7,
- }, {
- # with escaped quote in title
- 'url': 'https://jstrecords.bandcamp.com/album/entropy-ep',
- 'info_dict': {
- 'title': '"Entropy" EP',
- 'uploader_id': 'jstrecords',
- 'id': 'entropy-ep',
- },
- 'playlist_mincount': 3,
- }, {
- # not all tracks have songs
- 'url': 'https://insulters.bandcamp.com/album/we-are-the-plague',
- 'info_dict': {
- 'id': 'we-are-the-plague',
- 'title': 'WE ARE THE PLAGUE',
- 'uploader_id': 'insulters',
- },
- 'playlist_count': 2,
- }]
-
- @classmethod
- def suitable(cls, url):
- return (False
- if BandcampWeeklyIE.suitable(url) or BandcampIE.suitable(url)
- else super(BandcampAlbumIE, cls).suitable(url))
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- uploader_id = mobj.group('subdomain')
- album_id = mobj.group('album_id')
- playlist_id = album_id or uploader_id
- webpage = self._download_webpage(url, playlist_id)
- track_elements = re.findall(
- r'(?s)<div[^>]*>(.*?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)</div>', webpage)
- if not track_elements:
- raise ExtractorError('The page doesn\'t contain any tracks')
- # Only tracks with duration info have songs
- entries = [
- self.url_result(
- compat_urlparse.urljoin(url, t_path),
- ie=BandcampIE.ie_key(),
- video_title=self._search_regex(
- r'<span\b[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)',
- elem_content, 'track title', fatal=False))
- for elem_content, t_path in track_elements
- if self._html_search_meta('duration', elem_content, default=None)]
-
- title = self._html_search_regex(
- r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
- webpage, 'title', fatal=False)
- if title:
- title = title.replace(r'\"', '"')
- return {
- '_type': 'playlist',
- 'uploader_id': uploader_id,
- 'id': playlist_id,
- 'title': title,
- 'entries': entries,
- }
-
-
-class BandcampWeeklyIE(InfoExtractor):
- IE_NAME = 'Bandcamp:weekly'
- _VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
- _TESTS = [{
- 'url': 'https://bandcamp.com/?show=224',
- 'md5': 'b00df799c733cf7e0c567ed187dea0fd',
- 'info_dict': {
- 'id': '224',
- 'ext': 'opus',
- 'title': 'BC Weekly April 4th 2017 - Magic Moments',
- 'description': 'md5:5d48150916e8e02d030623a48512c874',
- 'duration': 5829.77,
- 'release_date': '20170404',
- 'series': 'Bandcamp Weekly',
- 'episode': 'Magic Moments',
- 'episode_number': 208,
- 'episode_id': '224',
- }
- }, {
- 'url': 'https://bandcamp.com/?blah/blah@&show=228',
- 'only_matching': True
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- blob = self._parse_json(
- self._search_regex(
- r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage,
- 'blob', group='blob'),
- video_id, transform_source=unescapeHTML)
-
- show = blob['bcw_show']
-
- # This is desired because any invalid show id redirects to `bandcamp.com`
- # which happens to expose the latest Bandcamp Weekly episode.
- show_id = int_or_none(show.get('show_id')) or int_or_none(video_id)
-
- formats = []
- for format_id, format_url in show['audio_stream'].items():
- if not url_or_none(format_url):
- continue
- for known_ext in KNOWN_EXTENSIONS:
- if known_ext in format_id:
- ext = known_ext
- break
- else:
- ext = None
- formats.append({
- 'format_id': format_id,
- 'url': format_url,
- 'ext': ext,
- 'vcodec': 'none',
- })
- self._sort_formats(formats)
-
- title = show.get('audio_title') or 'Bandcamp Weekly'
- subtitle = show.get('subtitle')
- if subtitle:
- title += ' - %s' % subtitle
-
- episode_number = None
- seq = blob.get('bcw_seq')
-
- if seq and isinstance(seq, list):
- try:
- episode_number = next(
- int_or_none(e.get('episode_number'))
- for e in seq
- if isinstance(e, dict) and int_or_none(e.get('id')) == show_id)
- except StopIteration:
- pass
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': show.get('desc') or show.get('short_desc'),
- 'duration': float_or_none(show.get('audio_duration')),
- 'is_live': False,
- 'release_date': unified_strdate(show.get('published_date')),
- 'series': 'Bandcamp Weekly',
- 'episode': show.get('subtitle'),
- 'episode_number': episode_number,
- 'episode_id': compat_str(video_id),
- 'formats': formats
- }
diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py
deleted file mode 100644
index 901c5a54f..000000000
--- a/youtube_dl/extractor/bbc.py
+++ /dev/null
@@ -1,1359 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import itertools
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- clean_html,
- dict_get,
- ExtractorError,
- float_or_none,
- get_element_by_class,
- int_or_none,
- js_to_json,
- parse_duration,
- parse_iso8601,
- try_get,
- unescapeHTML,
- url_or_none,
- urlencode_postdata,
- urljoin,
-)
-from ..compat import (
- compat_etree_Element,
- compat_HTTPError,
- compat_urlparse,
-)
-
-
-class BBCCoUkIE(InfoExtractor):
- IE_NAME = 'bbc.co.uk'
- IE_DESC = 'BBC iPlayer'
- _ID_REGEX = r'(?:[pbm][\da-z]{7}|w[\da-z]{7,14})'
- _VALID_URL = r'''(?x)
- https?://
- (?:www\.)?bbc\.co\.uk/
- (?:
- programmes/(?!articles/)|
- iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
- music/(?:clips|audiovideo/popular)[/#]|
- radio/player/|
- sounds/play/|
- events/[^/]+/play/[^/]+/
- )
- (?P<id>%s)(?!/(?:episodes|broadcasts|clips))
- ''' % _ID_REGEX
-
- _LOGIN_URL = 'https://account.bbc.com/signin'
- _NETRC_MACHINE = 'bbc'
-
- _MEDIASELECTOR_URLS = [
- # Provides HQ HLS streams with even better quality that pc mediaset but fails
- # with geolocation in some cases when it's even not geo restricted at all (e.g.
- # http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
- 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
- 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
- ]
-
- _MEDIASELECTION_NS = 'http://bbc.co.uk/2008/mp/mediaselection'
- _EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'
-
- _NAMESPACES = (
- _MEDIASELECTION_NS,
- _EMP_PLAYLIST_NS,
- )
-
- _TESTS = [
- {
- 'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
- 'info_dict': {
- 'id': 'b039d07m',
- 'ext': 'flv',
- 'title': 'Kaleidoscope, Leonard Cohen',
- 'description': 'The Canadian poet and songwriter reflects on his musical career.',
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- }
- },
- {
- 'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
- 'info_dict': {
- 'id': 'b00yng1d',
- 'ext': 'flv',
- 'title': 'The Man in Black: Series 3: The Printed Name',
- 'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
- 'duration': 1800,
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
- 'skip': 'Episode is no longer available on BBC iPlayer Radio',
- },
- {
- 'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
- 'info_dict': {
- 'id': 'b00yng1d',
- 'ext': 'flv',
- 'title': 'The Voice UK: Series 3: Blind Auditions 5',
- 'description': 'Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.',
- 'duration': 5100,
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
- 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
- },
- {
- 'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
- 'info_dict': {
- 'id': 'b03k3pb7',
- 'ext': 'flv',
- 'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
- 'description': '2. Invasion',
- 'duration': 3600,
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
- 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
- }, {
- 'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
- 'info_dict': {
- 'id': 'b04v209v',
- 'ext': 'flv',
- 'title': 'Pete Tong, The Essential New Tune Special',
- 'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
- 'duration': 10800,
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
- 'skip': 'Episode is no longer available on BBC iPlayer Radio',
- }, {
- 'url': 'http://www.bbc.co.uk/music/clips/p022h44b',
- 'note': 'Audio',
- 'info_dict': {
- 'id': 'p022h44j',
- 'ext': 'flv',
- 'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances',
- 'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.",
- 'duration': 227,
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- }
- }, {
- 'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
- 'note': 'Video',
- 'info_dict': {
- 'id': 'p025c103',
- 'ext': 'flv',
- 'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
- 'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
- 'duration': 226,
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- }
- }, {
- 'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
- 'info_dict': {
- 'id': 'p02n76xf',
- 'ext': 'flv',
- 'title': 'Natural World, 2015-2016: 2. Super Powered Owls',
- 'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d',
- 'duration': 3540,
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
- 'skip': 'geolocation',
- }, {
- 'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
- 'info_dict': {
- 'id': 'b05zmgw1',
- 'ext': 'flv',
- 'description': 'Kirsty Wark and Morgan Quaintance visit the Royal Academy as it prepares for its annual artistic extravaganza, meeting people who have come together to make the show unique.',
- 'title': 'Royal Academy Summer Exhibition',
- 'duration': 3540,
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
- 'skip': 'geolocation',
- }, {
- # iptv-all mediaset fails with geolocation however there is no geo restriction
- # for this programme at all
- 'url': 'http://www.bbc.co.uk/programmes/b06rkn85',
- 'info_dict': {
- 'id': 'b06rkms3',
- 'ext': 'flv',
- 'title': "Best of the Mini-Mixes 2015: Part 3, Annie Mac's Friday Night - BBC Radio 1",
- 'description': "Annie has part three in the Best of the Mini-Mixes 2015, plus the year's Most Played!",
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
- 'skip': 'Now it\'s really geo-restricted',
- }, {
- # compact player (https://github.com/ytdl-org/youtube-dl/issues/8147)
- 'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
- 'info_dict': {
- 'id': 'p028bfkj',
- 'ext': 'flv',
- 'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
- 'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
- }, {
- 'url': 'https://www.bbc.co.uk/sounds/play/m0007jzb',
- 'note': 'Audio',
- 'info_dict': {
- 'id': 'm0007jz9',
- 'ext': 'mp4',
- 'title': 'BBC Proms, 2019, Prom 34: West–Eastern Divan Orchestra',
- 'description': "Live BBC Proms. West–Eastern Divan Orchestra with Daniel Barenboim and Martha Argerich.",
- 'duration': 9840,
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- }
- }, {
- 'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
- 'only_matching': True,
- }, {
- 'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
- 'only_matching': True,
- }, {
- 'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
- 'only_matching': True,
- }, {
- 'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
- 'only_matching': True,
- }, {
- 'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
- 'only_matching': True,
- }, {
- 'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
- 'only_matching': True,
- }, {
- 'url': 'https://www.bbc.co.uk/programmes/m00005xn',
- 'only_matching': True,
- }, {
- 'url': 'https://www.bbc.co.uk/programmes/w172w4dww1jqt5s',
- 'only_matching': True,
- }]
-
- _USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
- login_page = self._download_webpage(
- self._LOGIN_URL, None, 'Downloading signin page')
-
- login_form = self._hidden_inputs(login_page)
-
- login_form.update({
- 'username': username,
- 'password': password,
- })
-
- post_url = urljoin(self._LOGIN_URL, self._search_regex(
- r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
- 'post url', default=self._LOGIN_URL, group='url'))
-
- response, urlh = self._download_webpage_handle(
- post_url, None, 'Logging in', data=urlencode_postdata(login_form),
- headers={'Referer': self._LOGIN_URL})
-
- if self._LOGIN_URL in urlh.geturl():
- error = clean_html(get_element_by_class('form-message', response))
- if error:
- raise ExtractorError(
- 'Unable to login: %s' % error, expected=True)
- raise ExtractorError('Unable to log in')
-
- def _real_initialize(self):
- self._login()
-
- class MediaSelectionError(Exception):
- def __init__(self, id):
- self.id = id
-
- def _extract_asx_playlist(self, connection, programme_id):
- asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
- return [ref.get('href') for ref in asx.findall('./Entry/ref')]
-
- def _extract_items(self, playlist):
- return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
-
- def _findall_ns(self, element, xpath):
- elements = []
- for ns in self._NAMESPACES:
- elements.extend(element.findall(xpath % ns))
- return elements
-
- def _extract_medias(self, media_selection):
- error = media_selection.find('./{%s}error' % self._MEDIASELECTION_NS)
- if error is None:
- media_selection.find('./{%s}error' % self._EMP_PLAYLIST_NS)
- if error is not None:
- raise BBCCoUkIE.MediaSelectionError(error.get('id'))
- return self._findall_ns(media_selection, './{%s}media')
-
- def _extract_connections(self, media):
- return self._findall_ns(media, './{%s}connection')
-
- def _get_subtitles(self, media, programme_id):
- subtitles = {}
- for connection in self._extract_connections(media):
- cc_url = url_or_none(connection.get('href'))
- if not cc_url:
- continue
- captions = self._download_xml(
- cc_url, programme_id, 'Downloading captions', fatal=False)
- if not isinstance(captions, compat_etree_Element):
- continue
- lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
- subtitles[lang] = [
- {
- 'url': connection.get('href'),
- 'ext': 'ttml',
- },
- ]
- return subtitles
-
- def _raise_extractor_error(self, media_selection_error):
- raise ExtractorError(
- '%s returned error: %s' % (self.IE_NAME, media_selection_error.id),
- expected=True)
-
- def _download_media_selector(self, programme_id):
- last_exception = None
- for mediaselector_url in self._MEDIASELECTOR_URLS:
- try:
- return self._download_media_selector_url(
- mediaselector_url % programme_id, programme_id)
- except BBCCoUkIE.MediaSelectionError as e:
- if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
- last_exception = e
- continue
- self._raise_extractor_error(e)
- self._raise_extractor_error(last_exception)
-
- def _download_media_selector_url(self, url, programme_id=None):
- media_selection = self._download_xml(
- url, programme_id, 'Downloading media selection XML',
- expected_status=(403, 404))
- return self._process_media_selector(media_selection, programme_id)
-
- def _process_media_selector(self, media_selection, programme_id):
- formats = []
- subtitles = None
- urls = []
-
- for media in self._extract_medias(media_selection):
- kind = media.get('kind')
- if kind in ('video', 'audio'):
- bitrate = int_or_none(media.get('bitrate'))
- encoding = media.get('encoding')
- service = media.get('service')
- width = int_or_none(media.get('width'))
- height = int_or_none(media.get('height'))
- file_size = int_or_none(media.get('media_file_size'))
- for connection in self._extract_connections(media):
- href = connection.get('href')
- if href in urls:
- continue
- if href:
- urls.append(href)
- conn_kind = connection.get('kind')
- protocol = connection.get('protocol')
- supplier = connection.get('supplier')
- transfer_format = connection.get('transferFormat')
- format_id = supplier or conn_kind or protocol
- if service:
- format_id = '%s_%s' % (service, format_id)
- # ASX playlist
- if supplier == 'asx':
- for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
- formats.append({
- 'url': ref,
- 'format_id': 'ref%s_%s' % (i, format_id),
- })
- elif transfer_format == 'dash':
- formats.extend(self._extract_mpd_formats(
- href, programme_id, mpd_id=format_id, fatal=False))
- elif transfer_format == 'hls':
- formats.extend(self._extract_m3u8_formats(
- href, programme_id, ext='mp4', entry_protocol='m3u8_native',
- m3u8_id=format_id, fatal=False))
- if re.search(self._USP_RE, href):
- usp_formats = self._extract_m3u8_formats(
- re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href),
- programme_id, ext='mp4', entry_protocol='m3u8_native',
- m3u8_id=format_id, fatal=False)
- for f in usp_formats:
- if f.get('height') and f['height'] > 720:
- continue
- formats.append(f)
- elif transfer_format == 'hds':
- formats.extend(self._extract_f4m_formats(
- href, programme_id, f4m_id=format_id, fatal=False))
- else:
- if not service and not supplier and bitrate:
- format_id += '-%d' % bitrate
- fmt = {
- 'format_id': format_id,
- 'filesize': file_size,
- }
- if kind == 'video':
- fmt.update({
- 'width': width,
- 'height': height,
- 'tbr': bitrate,
- 'vcodec': encoding,
- })
- else:
- fmt.update({
- 'abr': bitrate,
- 'acodec': encoding,
- 'vcodec': 'none',
- })
- if protocol in ('http', 'https'):
- # Direct link
- fmt.update({
- 'url': href,
- })
- elif protocol == 'rtmp':
- application = connection.get('application', 'ondemand')
- auth_string = connection.get('authString')
- identifier = connection.get('identifier')
- server = connection.get('server')
- fmt.update({
- 'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
- 'play_path': identifier,
- 'app': '%s?%s' % (application, auth_string),
- 'page_url': 'http://www.bbc.co.uk',
- 'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
- 'rtmp_live': False,
- 'ext': 'flv',
- })
- else:
- continue
- formats.append(fmt)
- elif kind == 'captions':
- subtitles = self.extract_subtitles(media, programme_id)
- return formats, subtitles
-
- def _download_playlist(self, playlist_id):
- try:
- playlist = self._download_json(
- 'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
- playlist_id, 'Downloading playlist JSON')
-
- version = playlist.get('defaultAvailableVersion')
- if version:
- smp_config = version['smpConfig']
- title = smp_config['title']
- description = smp_config['summary']
- for item in smp_config['items']:
- kind = item['kind']
- if kind not in ('programme', 'radioProgramme'):
- continue
- programme_id = item.get('vpid')
- duration = int_or_none(item.get('duration'))
- formats, subtitles = self._download_media_selector(programme_id)
- return programme_id, title, description, duration, formats, subtitles
- except ExtractorError as ee:
- if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
- raise
-
- # fallback to legacy playlist
- return self._process_legacy_playlist(playlist_id)
-
- def _process_legacy_playlist_url(self, url, display_id):
- playlist = self._download_legacy_playlist_url(url, display_id)
- return self._extract_from_legacy_playlist(playlist, display_id)
-
- def _process_legacy_playlist(self, playlist_id):
- return self._process_legacy_playlist_url(
- 'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id, playlist_id)
-
- def _download_legacy_playlist_url(self, url, playlist_id=None):
- return self._download_xml(
- url, playlist_id, 'Downloading legacy playlist XML')
-
- def _extract_from_legacy_playlist(self, playlist, playlist_id):
- no_items = playlist.find('./{%s}noItems' % self._EMP_PLAYLIST_NS)
- if no_items is not None:
- reason = no_items.get('reason')
- if reason == 'preAvailability':
- msg = 'Episode %s is not yet available' % playlist_id
- elif reason == 'postAvailability':
- msg = 'Episode %s is no longer available' % playlist_id
- elif reason == 'noMedia':
- msg = 'Episode %s is not currently available' % playlist_id
- else:
- msg = 'Episode %s is not available: %s' % (playlist_id, reason)
- raise ExtractorError(msg, expected=True)
-
- for item in self._extract_items(playlist):
- kind = item.get('kind')
- if kind not in ('programme', 'radioProgramme'):
- continue
- title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text
- description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS)
- description = description_el.text if description_el is not None else None
-
- def get_programme_id(item):
- def get_from_attributes(item):
- for p in('identifier', 'group'):
- value = item.get(p)
- if value and re.match(r'^[pb][\da-z]{7}$', value):
- return value
- get_from_attributes(item)
- mediator = item.find('./{%s}mediator' % self._EMP_PLAYLIST_NS)
- if mediator is not None:
- return get_from_attributes(mediator)
-
- programme_id = get_programme_id(item)
- duration = int_or_none(item.get('duration'))
-
- if programme_id:
- formats, subtitles = self._download_media_selector(programme_id)
- else:
- formats, subtitles = self._process_media_selector(item, playlist_id)
- programme_id = playlist_id
-
- return programme_id, title, description, duration, formats, subtitles
-
- def _real_extract(self, url):
- group_id = self._match_id(url)
-
- webpage = self._download_webpage(url, group_id, 'Downloading video page')
-
- error = self._search_regex(
- r'<div\b[^>]+\bclass=["\']smp__message delta["\'][^>]*>([^<]+)<',
- webpage, 'error', default=None)
- if error:
- raise ExtractorError(error, expected=True)
-
- programme_id = None
- duration = None
-
- tviplayer = self._search_regex(
- r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
- webpage, 'player', default=None)
-
- if tviplayer:
- player = self._parse_json(tviplayer, group_id).get('player', {})
- duration = int_or_none(player.get('duration'))
- programme_id = player.get('vpid')
-
- if not programme_id:
- programme_id = self._search_regex(
- r'"vpid"\s*:\s*"(%s)"' % self._ID_REGEX, webpage, 'vpid', fatal=False, default=None)
-
- if programme_id:
- formats, subtitles = self._download_media_selector(programme_id)
- title = self._og_search_title(webpage, default=None) or self._html_search_regex(
- (r'<h2[^>]+id="parent-title"[^>]*>(.+?)</h2>',
- r'<div[^>]+class="info"[^>]*>\s*<h1>(.+?)</h1>'), webpage, 'title')
- description = self._search_regex(
- (r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
- r'<div[^>]+class="info_+synopsis"[^>]*>([^<]+)</div>'),
- webpage, 'description', default=None)
- if not description:
- description = self._html_search_meta('description', webpage)
- else:
- programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
-
- self._sort_formats(formats)
-
- return {
- 'id': programme_id,
- 'title': title,
- 'description': description,
- 'thumbnail': self._og_search_thumbnail(webpage, default=None),
- 'duration': duration,
- 'formats': formats,
- 'subtitles': subtitles,
- }
-
-
-class BBCIE(BBCCoUkIE):
- IE_NAME = 'bbc'
- IE_DESC = 'BBC'
- _VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
-
- _MEDIASELECTOR_URLS = [
- # Provides HQ HLS streams but fails with geolocation in some cases when it's
- # even not geo restricted at all
- 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
- # Provides more formats, namely direct mp4 links, but fails on some videos with
- # notukerror for non UK (?) users (e.g.
- # http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
- 'http://open.live.bbc.co.uk/mediaselector/4/mtis/stream/%s',
- # Provides fewer formats, but works everywhere for everybody (hopefully)
- 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/journalism-pc/vpid/%s',
- ]
-
- _TESTS = [{
- # article with multiple videos embedded with data-playable containing vpids
- 'url': 'http://www.bbc.com/news/world-europe-32668511',
- 'info_dict': {
- 'id': 'world-europe-32668511',
- 'title': 'Russia stages massive WW2 parade',
- 'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
- },
- 'playlist_count': 2,
- }, {
- # article with multiple videos embedded with data-playable (more videos)
- 'url': 'http://www.bbc.com/news/business-28299555',
- 'info_dict': {
- 'id': 'business-28299555',
- 'title': 'Farnborough Airshow: Video highlights',
- 'description': 'BBC reports and video highlights at the Farnborough Airshow.',
- },
- 'playlist_count': 9,
- 'skip': 'Save time',
- }, {
- # article with multiple videos embedded with `new SMP()`
- # broken
- 'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
- 'info_dict': {
- 'id': '3662a707-0af9-3149-963f-47bea720b460',
- 'title': 'BUGGER',
- },
- 'playlist_count': 18,
- }, {
- # single video embedded with data-playable containing vpid
- 'url': 'http://www.bbc.com/news/world-europe-32041533',
- 'info_dict': {
- 'id': 'p02mprgb',
- 'ext': 'mp4',
- 'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
- 'description': 'md5:2868290467291b37feda7863f7a83f54',
- 'duration': 47,
- 'timestamp': 1427219242,
- 'upload_date': '20150324',
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- }
- }, {
- # article with single video embedded with data-playable containing XML playlist
- # with direct video links as progressiveDownloadUrl (for now these are extracted)
- # and playlist with f4m and m3u8 as streamingUrl
- 'url': 'http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu',
- 'info_dict': {
- 'id': '150615_telabyad_kentin_cogu',
- 'ext': 'mp4',
- 'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
- 'description': 'md5:33a4805a855c9baf7115fcbde57e7025',
- 'timestamp': 1434397334,
- 'upload_date': '20150615',
- },
- 'params': {
- 'skip_download': True,
- }
- }, {
- # single video embedded with data-playable containing XML playlists (regional section)
- 'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
- 'info_dict': {
- 'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
- 'ext': 'mp4',
- 'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
- 'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
- 'timestamp': 1434713142,
- 'upload_date': '20150619',
- },
- 'params': {
- 'skip_download': True,
- }
- }, {
- # single video from video playlist embedded with vxp-playlist-data JSON
- 'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
- 'info_dict': {
- 'id': 'p02w6qjc',
- 'ext': 'mp4',
- 'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
- 'duration': 56,
- 'description': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
- },
- 'params': {
- 'skip_download': True,
- }
- }, {
- # single video story with digitalData
- 'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
- 'info_dict': {
- 'id': 'p02q6gc4',
- 'ext': 'flv',
- 'title': 'Sri Lanka’s spicy secret',
- 'description': 'As a new train line to Jaffna opens up the country’s north, travellers can experience a truly distinct slice of Tamil culture.',
- 'timestamp': 1437674293,
- 'upload_date': '20150723',
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- }
- }, {
- # single video story without digitalData
- 'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
- 'info_dict': {
- 'id': 'p018zqqg',
- 'ext': 'mp4',
- 'title': 'Hyundai Santa Fe Sport: Rock star',
- 'description': 'md5:b042a26142c4154a6e472933cf20793d',
- 'timestamp': 1415867444,
- 'upload_date': '20141113',
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- }
- }, {
- # single video embedded with Morph
- 'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
- 'info_dict': {
- 'id': 'p041vhd0',
- 'ext': 'mp4',
- 'title': "Nigeria v Japan - Men's First Round",
- 'description': 'Live coverage of the first round from Group B at the Amazonia Arena.',
- 'duration': 7980,
- 'uploader': 'BBC Sport',
- 'uploader_id': 'bbc_sport',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- 'skip': 'Georestricted to UK',
- }, {
- # single video with playlist.sxml URL in playlist param
- 'url': 'http://www.bbc.com/sport/0/football/33653409',
- 'info_dict': {
- 'id': 'p02xycnp',
- 'ext': 'mp4',
- 'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
- 'description': 'BBC Sport\'s David Ornstein has the latest transfer gossip, including rumours of a Manchester United return for Cristiano Ronaldo.',
- 'duration': 140,
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- }
- }, {
- # article with multiple videos embedded with playlist.sxml in playlist param
- 'url': 'http://www.bbc.com/sport/0/football/34475836',
- 'info_dict': {
- 'id': '34475836',
- 'title': 'Jurgen Klopp: Furious football from a witty and winning coach',
- 'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
- },
- 'playlist_count': 3,
- }, {
- # school report article with single video
- 'url': 'http://www.bbc.co.uk/schoolreport/35744779',
- 'info_dict': {
- 'id': '35744779',
- 'title': 'School which breaks down barriers in Jerusalem',
- },
- 'playlist_count': 1,
- }, {
- # single video with playlist URL from weather section
- 'url': 'http://www.bbc.com/weather/features/33601775',
- 'only_matching': True,
- }, {
- # custom redirection to www.bbc.com
- 'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
- 'only_matching': True,
- }, {
- # single video article embedded with data-media-vpid
- 'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
- 'only_matching': True,
- }, {
- 'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
- 'info_dict': {
- 'id': 'p06556y7',
- 'ext': 'mp4',
- 'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
- 'description': 'md5:4b7dfd063d5a789a1512e99662be3ddd',
- },
- 'params': {
- 'skip_download': True,
- }
- }, {
- # window.__PRELOADED_STATE__
- 'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
- 'info_dict': {
- 'id': 'b0b9z4vz',
- 'ext': 'mp4',
- 'title': 'Prom 6: An American in Paris and Turangalila',
- 'description': 'md5:51cf7d6f5c8553f197e58203bc78dff8',
- 'uploader': 'Radio 3',
- 'uploader_id': 'bbc_radio_three',
- },
- }, {
- 'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
- 'info_dict': {
- 'id': 'p06w9tws',
- 'ext': 'mp4',
- 'title': 'md5:2fabf12a726603193a2879a055f72514',
- 'description': 'Learn English words and phrases from this story',
- },
- 'add_ie': [BBCCoUkIE.ie_key()],
- }]
-
- @classmethod
- def suitable(cls, url):
- EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerPlaylistIE, BBCCoUkPlaylistIE)
- return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
- else super(BBCIE, cls).suitable(url))
-
- def _extract_from_media_meta(self, media_meta, video_id):
- # Direct links to media in media metadata (e.g.
- # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
- # TODO: there are also f4m and m3u8 streams incorporated in playlist.sxml
- source_files = media_meta.get('sourceFiles')
- if source_files:
- return [{
- 'url': f['url'],
- 'format_id': format_id,
- 'ext': f.get('encoding'),
- 'tbr': float_or_none(f.get('bitrate'), 1000),
- 'filesize': int_or_none(f.get('filesize')),
- } for format_id, f in source_files.items() if f.get('url')], []
-
- programme_id = media_meta.get('externalId')
- if programme_id:
- return self._download_media_selector(programme_id)
-
- # Process playlist.sxml as legacy playlist
- href = media_meta.get('href')
- if href:
- playlist = self._download_legacy_playlist_url(href)
- _, _, _, _, formats, subtitles = self._extract_from_legacy_playlist(playlist, video_id)
- return formats, subtitles
-
- return [], []
-
- def _extract_from_playlist_sxml(self, url, playlist_id, timestamp):
- programme_id, title, description, duration, formats, subtitles = \
- self._process_legacy_playlist_url(url, playlist_id)
- self._sort_formats(formats)
- return {
- 'id': programme_id,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'timestamp': timestamp,
- 'formats': formats,
- 'subtitles': subtitles,
- }
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
-
- webpage = self._download_webpage(url, playlist_id)
-
- json_ld_info = self._search_json_ld(webpage, playlist_id, default={})
- timestamp = json_ld_info.get('timestamp')
-
- playlist_title = json_ld_info.get('title')
- if not playlist_title:
- playlist_title = self._og_search_title(
- webpage, default=None) or self._html_search_regex(
- r'<title>(.+?)</title>', webpage, 'playlist title', default=None)
- if playlist_title:
- playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip()
-
- playlist_description = json_ld_info.get(
- 'description') or self._og_search_description(webpage, default=None)
-
- if not timestamp:
- timestamp = parse_iso8601(self._search_regex(
- [r'<meta[^>]+property="article:published_time"[^>]+content="([^"]+)"',
- r'itemprop="datePublished"[^>]+datetime="([^"]+)"',
- r'"datePublished":\s*"([^"]+)'],
- webpage, 'date', default=None))
-
- entries = []
-
- # article with multiple videos embedded with playlist.sxml (e.g.
- # http://www.bbc.com/sport/0/football/34475836)
- playlists = re.findall(r'<param[^>]+name="playlist"[^>]+value="([^"]+)"', webpage)
- playlists.extend(re.findall(r'data-media-id="([^"]+/playlist\.sxml)"', webpage))
- if playlists:
- entries = [
- self._extract_from_playlist_sxml(playlist_url, playlist_id, timestamp)
- for playlist_url in playlists]
-
- # news article with multiple videos embedded with data-playable
- data_playables = re.findall(r'data-playable=(["\'])({.+?})\1', webpage)
- if data_playables:
- for _, data_playable_json in data_playables:
- data_playable = self._parse_json(
- unescapeHTML(data_playable_json), playlist_id, fatal=False)
- if not data_playable:
- continue
- settings = data_playable.get('settings', {})
- if settings:
- # data-playable with video vpid in settings.playlistObject.items (e.g.
- # http://www.bbc.com/news/world-us-canada-34473351)
- playlist_object = settings.get('playlistObject', {})
- if playlist_object:
- items = playlist_object.get('items')
- if items and isinstance(items, list):
- title = playlist_object['title']
- description = playlist_object.get('summary')
- duration = int_or_none(items[0].get('duration'))
- programme_id = items[0].get('vpid')
- formats, subtitles = self._download_media_selector(programme_id)
- self._sort_formats(formats)
- entries.append({
- 'id': programme_id,
- 'title': title,
- 'description': description,
- 'timestamp': timestamp,
- 'duration': duration,
- 'formats': formats,
- 'subtitles': subtitles,
- })
- else:
- # data-playable without vpid but with a playlist.sxml URLs
- # in otherSettings.playlist (e.g.
- # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
- playlist = data_playable.get('otherSettings', {}).get('playlist', {})
- if playlist:
- entry = None
- for key in ('streaming', 'progressiveDownload'):
- playlist_url = playlist.get('%sUrl' % key)
- if not playlist_url:
- continue
- try:
- info = self._extract_from_playlist_sxml(
- playlist_url, playlist_id, timestamp)
- if not entry:
- entry = info
- else:
- entry['title'] = info['title']
- entry['formats'].extend(info['formats'])
- except Exception as e:
- # Some playlist URL may fail with 500, at the same time
- # the other one may work fine (e.g.
- # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
- continue
- raise
- if entry:
- self._sort_formats(entry['formats'])
- entries.append(entry)
-
- if entries:
- return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
-
- # http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227
- group_id = self._search_regex(
- r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
- webpage, 'group id', default=None)
- if playlist_id:
- return self.url_result(
- 'https://www.bbc.co.uk/programmes/%s' % group_id,
- ie=BBCCoUkIE.ie_key())
-
- # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
- programme_id = self._search_regex(
- [r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX,
- r'<param[^>]+name="externalIdentifier"[^>]+value="(%s)"' % self._ID_REGEX,
- r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX],
- webpage, 'vpid', default=None)
-
- if programme_id:
- formats, subtitles = self._download_media_selector(programme_id)
- self._sort_formats(formats)
- # digitalData may be missing (e.g. http://www.bbc.com/autos/story/20130513-hyundais-rock-star)
- digital_data = self._parse_json(
- self._search_regex(
- r'var\s+digitalData\s*=\s*({.+?});?\n', webpage, 'digital data', default='{}'),
- programme_id, fatal=False)
- page_info = digital_data.get('page', {}).get('pageInfo', {})
- title = page_info.get('pageName') or self._og_search_title(webpage)
- description = page_info.get('description') or self._og_search_description(webpage)
- timestamp = parse_iso8601(page_info.get('publicationDate')) or timestamp
- return {
- 'id': programme_id,
- 'title': title,
- 'description': description,
- 'timestamp': timestamp,
- 'formats': formats,
- 'subtitles': subtitles,
- }
-
- # Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
- # There are several setPayload calls may be present but the video
- # seems to be always related to the first one
- morph_payload = self._parse_json(
- self._search_regex(
- r'Morph\.setPayload\([^,]+,\s*({.+?})\);',
- webpage, 'morph payload', default='{}'),
- playlist_id, fatal=False)
- if morph_payload:
- components = try_get(morph_payload, lambda x: x['body']['components'], list) or []
- for component in components:
- if not isinstance(component, dict):
- continue
- lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict)
- if not lead_media:
- continue
- identifiers = lead_media.get('identifiers')
- if not identifiers or not isinstance(identifiers, dict):
- continue
- programme_id = identifiers.get('vpid') or identifiers.get('playablePid')
- if not programme_id:
- continue
- title = lead_media.get('title') or self._og_search_title(webpage)
- formats, subtitles = self._download_media_selector(programme_id)
- self._sort_formats(formats)
- description = lead_media.get('summary')
- uploader = lead_media.get('masterBrand')
- uploader_id = lead_media.get('mid')
- duration = None
- duration_d = lead_media.get('duration')
- if isinstance(duration_d, dict):
- duration = parse_duration(dict_get(
- duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
- return {
- 'id': programme_id,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'formats': formats,
- 'subtitles': subtitles,
- }
-
- preload_state = self._parse_json(self._search_regex(
- r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
- 'preload state', default='{}'), playlist_id, fatal=False)
- if preload_state:
- current_programme = preload_state.get('programmes', {}).get('current') or {}
- programme_id = current_programme.get('id')
- if current_programme and programme_id and current_programme.get('type') == 'playable_item':
- title = current_programme.get('titles', {}).get('tertiary') or playlist_title
- formats, subtitles = self._download_media_selector(programme_id)
- self._sort_formats(formats)
- synopses = current_programme.get('synopses') or {}
- network = current_programme.get('network') or {}
- duration = int_or_none(
- current_programme.get('duration', {}).get('value'))
- thumbnail = None
- image_url = current_programme.get('image_url')
- if image_url:
- thumbnail = image_url.replace('{recipe}', '1920x1920')
- return {
- 'id': programme_id,
- 'title': title,
- 'description': dict_get(synopses, ('long', 'medium', 'short')),
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'uploader': network.get('short_title'),
- 'uploader_id': network.get('id'),
- 'formats': formats,
- 'subtitles': subtitles,
- }
-
- bbc3_config = self._parse_json(
- self._search_regex(
- r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
- 'bbcthree config', default='{}'),
- playlist_id, transform_source=js_to_json, fatal=False)
- if bbc3_config:
- bbc3_playlist = try_get(
- bbc3_config, lambda x: x['payload']['content']['bbcMedia']['playlist'],
- dict)
- if bbc3_playlist:
- playlist_title = bbc3_playlist.get('title') or playlist_title
- thumbnail = bbc3_playlist.get('holdingImageURL')
- entries = []
- for bbc3_item in bbc3_playlist['items']:
- programme_id = bbc3_item.get('versionID')
- if not programme_id:
- continue
- formats, subtitles = self._download_media_selector(programme_id)
- self._sort_formats(formats)
- entries.append({
- 'id': programme_id,
- 'title': playlist_title,
- 'thumbnail': thumbnail,
- 'timestamp': timestamp,
- 'formats': formats,
- 'subtitles': subtitles,
- })
- return self.playlist_result(
- entries, playlist_id, playlist_title, playlist_description)
-
- def extract_all(pattern):
- return list(filter(None, map(
- lambda s: self._parse_json(s, playlist_id, fatal=False),
- re.findall(pattern, webpage))))
-
- # Multiple video article (e.g.
- # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
- EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX
- entries = []
- for match in extract_all(r'new\s+SMP\(({.+?})\)'):
- embed_url = match.get('playerSettings', {}).get('externalEmbedUrl')
- if embed_url and re.match(EMBED_URL, embed_url):
- entries.append(embed_url)
- entries.extend(re.findall(
- r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
- if entries:
- return self.playlist_result(
- [self.url_result(entry_, 'BBCCoUk') for entry_ in entries],
- playlist_id, playlist_title, playlist_description)
-
- # Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
- medias = extract_all(r"data-media-meta='({[^']+})'")
-
- if not medias:
- # Single video article (e.g. http://www.bbc.com/news/video_and_audio/international)
- media_asset = self._search_regex(
- r'mediaAssetPage\.init\(\s*({.+?}), "/',
- webpage, 'media asset', default=None)
- if media_asset:
- media_asset_page = self._parse_json(media_asset, playlist_id, fatal=False)
- medias = []
- for video in media_asset_page.get('videos', {}).values():
- medias.extend(video.values())
-
- if not medias:
- # Multiple video playlist with single `now playing` entry (e.g.
- # http://www.bbc.com/news/video_and_audio/must_see/33767813)
- vxp_playlist = self._parse_json(
- self._search_regex(
- r'<script[^>]+class="vxp-playlist-data"[^>]+type="application/json"[^>]*>([^<]+)</script>',
- webpage, 'playlist data'),
- playlist_id)
- playlist_medias = []
- for item in vxp_playlist:
- media = item.get('media')
- if not media:
- continue
- playlist_medias.append(media)
- # Download single video if found media with asset id matching the video id from URL
- if item.get('advert', {}).get('assetId') == playlist_id:
- medias = [media]
- break
- # Fallback to the whole playlist
- if not medias:
- medias = playlist_medias
-
- entries = []
- for num, media_meta in enumerate(medias, start=1):
- formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id)
- if not formats:
- continue
- self._sort_formats(formats)
-
- video_id = media_meta.get('externalId')
- if not video_id:
- video_id = playlist_id if len(medias) == 1 else '%s-%s' % (playlist_id, num)
-
- title = media_meta.get('caption')
- if not title:
- title = playlist_title if len(medias) == 1 else '%s - Video %s' % (playlist_title, num)
-
- duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration'))
-
- images = []
- for image in media_meta.get('images', {}).values():
- images.extend(image.values())
- if 'image' in media_meta:
- images.append(media_meta['image'])
-
- thumbnails = [{
- 'url': image.get('href'),
- 'width': int_or_none(image.get('width')),
- 'height': int_or_none(image.get('height')),
- } for image in images]
-
- entries.append({
- 'id': video_id,
- 'title': title,
- 'thumbnails': thumbnails,
- 'duration': duration,
- 'timestamp': timestamp,
- 'formats': formats,
- 'subtitles': subtitles,
- })
-
- return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
-
-
-class BBCCoUkArticleIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
- IE_NAME = 'bbc.co.uk:article'
- IE_DESC = 'BBC articles'
-
- _TEST = {
- 'url': 'http://www.bbc.co.uk/programmes/articles/3jNQLTMrPlYGTBn0WV6M2MS/not-your-typical-role-model-ada-lovelace-the-19th-century-programmer',
- 'info_dict': {
- 'id': '3jNQLTMrPlYGTBn0WV6M2MS',
- 'title': 'Calculating Ada: The Countess of Computing - Not your typical role model: Ada Lovelace the 19th century programmer - BBC Four',
- 'description': 'Hannah Fry reveals some of her surprising discoveries about Ada Lovelace during filming.',
- },
- 'playlist_count': 4,
- 'add_ie': ['BBCCoUk'],
- }
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
-
- webpage = self._download_webpage(url, playlist_id)
-
- title = self._og_search_title(webpage)
- description = self._og_search_description(webpage).strip()
-
- entries = [self.url_result(programme_url) for programme_url in re.findall(
- r'<div[^>]+typeof="Clip"[^>]+resource="([^"]+)"', webpage)]
-
- return self.playlist_result(entries, playlist_id, title, description)
-
-
-class BBCCoUkPlaylistBaseIE(InfoExtractor):
- def _entries(self, webpage, url, playlist_id):
- single_page = 'page' in compat_urlparse.parse_qs(
- compat_urlparse.urlparse(url).query)
- for page_num in itertools.count(2):
- for video_id in re.findall(
- self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage):
- yield self.url_result(
- self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
- if single_page:
- return
- next_page = self._search_regex(
- r'<li[^>]+class=(["\'])pagination_+next\1[^>]*><a[^>]+href=(["\'])(?P<url>(?:(?!\2).)+)\2',
- webpage, 'next page url', default=None, group='url')
- if not next_page:
- break
- webpage = self._download_webpage(
- compat_urlparse.urljoin(url, next_page), playlist_id,
- 'Downloading page %d' % page_num, page_num)
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
-
- webpage = self._download_webpage(url, playlist_id)
-
- title, description = self._extract_title_and_description(webpage)
-
- return self.playlist_result(
- self._entries(webpage, url, playlist_id),
- playlist_id, title, description)
-
-
-class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
- IE_NAME = 'bbc.co.uk:iplayer:playlist'
- _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/(?:episodes|group)/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
- _URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s'
- _VIDEO_ID_TEMPLATE = r'data-ip-id=["\'](%s)'
- _TESTS = [{
- 'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
- 'info_dict': {
- 'id': 'b05rcz9v',
- 'title': 'The Disappearance',
- 'description': 'French thriller serial about a missing teenager.',
- },
- 'playlist_mincount': 6,
- 'skip': 'This programme is not currently available on BBC iPlayer',
- }, {
- # Available for over a year unlike 30 days for most other programmes
- 'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32',
- 'info_dict': {
- 'id': 'p02tcc32',
- 'title': 'Bohemian Icons',
- 'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7',
- },
- 'playlist_mincount': 10,
- }]
-
- def _extract_title_and_description(self, webpage):
- title = self._search_regex(r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
- description = self._search_regex(
- r'<p[^>]+class=(["\'])subtitle\1[^>]*>(?P<value>[^<]+)</p>',
- webpage, 'description', fatal=False, group='value')
- return title, description
-
-
-class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
- IE_NAME = 'bbc.co.uk:playlist'
- _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>%s)/(?:episodes|broadcasts|clips)' % BBCCoUkIE._ID_REGEX
- _URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s'
- _VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)'
- _TESTS = [{
- 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
- 'info_dict': {
- 'id': 'b05rcz9v',
- 'title': 'The Disappearance - Clips - BBC Four',
- 'description': 'French thriller serial about a missing teenager.',
- },
- 'playlist_mincount': 7,
- }, {
- # multipage playlist, explicit page
- 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips?page=1',
- 'info_dict': {
- 'id': 'b00mfl7n',
- 'title': 'Frozen Planet - Clips - BBC One',
- 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
- },
- 'playlist_mincount': 24,
- }, {
- # multipage playlist, all pages
- 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips',
- 'info_dict': {
- 'id': 'b00mfl7n',
- 'title': 'Frozen Planet - Clips - BBC One',
- 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
- },
- 'playlist_mincount': 142,
- }, {
- 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06',
- 'only_matching': True,
- }, {
- 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
- 'only_matching': True,
- }, {
- 'url': 'http://www.bbc.co.uk/programmes/b055jkys/episodes/player',
- 'only_matching': True,
- }]
-
- def _extract_title_and_description(self, webpage):
- title = self._og_search_title(webpage, fatal=False)
- description = self._og_search_description(webpage)
- return title, description
diff --git a/youtube_dl/extractor/bellmedia.py b/youtube_dl/extractor/bellmedia.py
deleted file mode 100644
index f36a2452d..000000000
--- a/youtube_dl/extractor/bellmedia.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-
-
-class BellMediaIE(InfoExtractor):
- _VALID_URL = r'''(?x)https?://(?:www\.)?
- (?P<domain>
- (?:
- ctv|
- tsn|
- bnn(?:bloomberg)?|
- thecomedynetwork|
- discovery|
- discoveryvelocity|
- sciencechannel|
- investigationdiscovery|
- animalplanet|
- bravo|
- mtv|
- space|
- etalk
- )\.ca|
- much\.com
- )/.*?(?:\bvid(?:eoid)?=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
- _TESTS = [{
- 'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070',
- 'md5': '36d3ef559cfe8af8efe15922cd3ce950',
- 'info_dict': {
- 'id': '1403070',
- 'ext': 'flv',
- 'title': 'David Cockfield\'s Top Picks',
- 'description': 'md5:810f7f8c6a83ad5b48677c3f8e5bb2c3',
- 'upload_date': '20180525',
- 'timestamp': 1527288600,
- },
- }, {
- 'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582',
- 'only_matching': True,
- }, {
- 'url': 'http://www.tsn.ca/video/expectations-high-for-milos-raonic-at-us-open~939549',
- 'only_matching': True,
- }, {
- 'url': 'http://www.bnn.ca/video/berman-s-call-part-two-viewer-questions~939654',
- 'only_matching': True,
- }, {
- 'url': 'http://www.ctv.ca/YourMorning/Video/S1E6-Monday-August-29-2016-vid938009',
- 'only_matching': True,
- }, {
- 'url': 'http://www.much.com/shows/atmidnight/episode948007/tuesday-september-13-2016',
- 'only_matching': True,
- }, {
- 'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6',
- 'only_matching': True,
- }, {
- 'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430',
- 'only_matching': True,
- }, {
- 'url': 'http://www.etalk.ca/video?videoid=663455',
- 'only_matching': True,
- }]
- _DOMAINS = {
- 'thecomedynetwork': 'comedy',
- 'discoveryvelocity': 'discvel',
- 'sciencechannel': 'discsci',
- 'investigationdiscovery': 'invdisc',
- 'animalplanet': 'aniplan',
- 'etalk': 'ctv',
- 'bnnbloomberg': 'bnn',
- }
-
- def _real_extract(self, url):
- domain, video_id = re.match(self._VALID_URL, url).groups()
- domain = domain.split('.')[0]
- return {
- '_type': 'url_transparent',
- 'id': video_id,
- 'url': '9c9media:%s_web:%s' % (self._DOMAINS.get(domain, domain), video_id),
- 'ie_key': 'NineCNineMedia',
- }
diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py
deleted file mode 100644
index 80bd696e2..000000000
--- a/youtube_dl/extractor/bilibili.py
+++ /dev/null
@@ -1,421 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import hashlib
-import re
-
-from .common import InfoExtractor
-from ..compat import (
- compat_parse_qs,
- compat_urlparse,
-)
-from ..utils import (
- ExtractorError,
- int_or_none,
- float_or_none,
- parse_iso8601,
- smuggle_url,
- str_or_none,
- strip_jsonp,
- unified_timestamp,
- unsmuggle_url,
- urlencode_postdata,
-)
-
-
-class BiliBiliIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/(?P<anime_id>\d+)/play#)(?P<id>\d+)'
-
- _TESTS = [{
- 'url': 'http://www.bilibili.tv/video/av1074402/',
- 'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
- 'info_dict': {
- 'id': '1074402',
- 'ext': 'flv',
- 'title': '【金坷垃】金泡沫',
- 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
- 'duration': 308.067,
- 'timestamp': 1398012678,
- 'upload_date': '20140420',
- 'thumbnail': r're:^https?://.+\.jpg',
- 'uploader': '菊子桑',
- 'uploader_id': '156160',
- },
- }, {
- # Tested in BiliBiliBangumiIE
- 'url': 'http://bangumi.bilibili.com/anime/1869/play#40062',
- 'only_matching': True,
- }, {
- 'url': 'http://bangumi.bilibili.com/anime/5802/play#100643',
- 'md5': '3f721ad1e75030cc06faf73587cfec57',
- 'info_dict': {
- 'id': '100643',
- 'ext': 'mp4',
- 'title': 'CHAOS;CHILD',
- 'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...',
- },
- 'skip': 'Geo-restricted to China',
- }, {
- # Title with double quotes
- 'url': 'http://www.bilibili.com/video/av8903802/',
- 'info_dict': {
- 'id': '8903802',
- 'title': '阿滴英文|英文歌分享#6 "Closer',
- 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
- },
- 'playlist': [{
- 'info_dict': {
- 'id': '8903802_part1',
- 'ext': 'flv',
- 'title': '阿滴英文|英文歌分享#6 "Closer',
- 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
- 'uploader': '阿滴英文',
- 'uploader_id': '65880958',
- 'timestamp': 1488382634,
- 'upload_date': '20170301',
- },
- 'params': {
- 'skip_download': True, # Test metadata only
- },
- }, {
- 'info_dict': {
- 'id': '8903802_part2',
- 'ext': 'flv',
- 'title': '阿滴英文|英文歌分享#6 "Closer',
- 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
- 'uploader': '阿滴英文',
- 'uploader_id': '65880958',
- 'timestamp': 1488382634,
- 'upload_date': '20170301',
- },
- 'params': {
- 'skip_download': True, # Test metadata only
- },
- }]
- }]
-
- _APP_KEY = 'iVGUTjsxvpLeuDCf'
- _BILIBILI_KEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt'
-
- def _report_error(self, result):
- if 'message' in result:
- raise ExtractorError('%s said: %s' % (self.IE_NAME, result['message']), expected=True)
- elif 'code' in result:
- raise ExtractorError('%s returns error %d' % (self.IE_NAME, result['code']), expected=True)
- else:
- raise ExtractorError('Can\'t extract Bangumi episode ID')
-
- def _real_extract(self, url):
- url, smuggled_data = unsmuggle_url(url, {})
-
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- anime_id = mobj.group('anime_id')
- webpage = self._download_webpage(url, video_id)
-
- if 'anime/' not in url:
- cid = self._search_regex(
- r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid',
- default=None
- ) or compat_parse_qs(self._search_regex(
- [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
- r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)',
- r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
- webpage, 'player parameters'))['cid'][0]
- else:
- if 'no_bangumi_tip' not in smuggled_data:
- self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run youtube-dl with %s' % (
- video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id)))
- headers = {
- 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
- 'Referer': url
- }
- headers.update(self.geo_verification_headers())
-
- js = self._download_json(
- 'http://bangumi.bilibili.com/web_api/get_source', video_id,
- data=urlencode_postdata({'episode_id': video_id}),
- headers=headers)
- if 'result' not in js:
- self._report_error(js)
- cid = js['result']['cid']
-
- headers = {
- 'Referer': url
- }
- headers.update(self.geo_verification_headers())
-
- entries = []
-
- RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4')
- for num, rendition in enumerate(RENDITIONS, start=1):
- payload = 'appkey=%s&cid=%s&otype=json&%s' % (self._APP_KEY, cid, rendition)
- sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
-
- video_info = self._download_json(
- 'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign),
- video_id, note='Downloading video info page',
- headers=headers, fatal=num == len(RENDITIONS))
-
- if not video_info:
- continue
-
- if 'durl' not in video_info:
- if num < len(RENDITIONS):
- continue
- self._report_error(video_info)
-
- for idx, durl in enumerate(video_info['durl']):
- formats = [{
- 'url': durl['url'],
- 'filesize': int_or_none(durl['size']),
- }]
- for backup_url in durl.get('backup_url', []):
- formats.append({
- 'url': backup_url,
- # backup URLs have lower priorities
- 'preference': -2 if 'hd.mp4' in backup_url else -3,
- })
-
- for a_format in formats:
- a_format.setdefault('http_headers', {}).update({
- 'Referer': url,
- })
-
- self._sort_formats(formats)
-
- entries.append({
- 'id': '%s_part%s' % (video_id, idx),
- 'duration': float_or_none(durl.get('length'), 1000),
- 'formats': formats,
- })
- break
-
- title = self._html_search_regex(
- ('<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
- '(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title',
- group='title')
- description = self._html_search_meta('description', webpage)
- timestamp = unified_timestamp(self._html_search_regex(
- r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time',
- default=None) or self._html_search_meta(
- 'uploadDate', webpage, 'timestamp', default=None))
- thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage)
-
- # TODO 'view_count' requires deobfuscating Javascript
- info = {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'timestamp': timestamp,
- 'thumbnail': thumbnail,
- 'duration': float_or_none(video_info.get('timelength'), scale=1000),
- }
-
- uploader_mobj = re.search(
- r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>(?P<name>[^<]+)',
- webpage)
- if uploader_mobj:
- info.update({
- 'uploader': uploader_mobj.group('name'),
- 'uploader_id': uploader_mobj.group('id'),
- })
- if not info.get('uploader'):
- info['uploader'] = self._html_search_meta(
- 'author', webpage, 'uploader', default=None)
-
- for entry in entries:
- entry.update(info)
-
- if len(entries) == 1:
- return entries[0]
- else:
- for idx, entry in enumerate(entries):
- entry['id'] = '%s_part%d' % (video_id, (idx + 1))
-
- return {
- '_type': 'multi_video',
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'entries': entries,
- }
-
-
-class BiliBiliBangumiIE(InfoExtractor):
- _VALID_URL = r'https?://bangumi\.bilibili\.com/anime/(?P<id>\d+)'
-
- IE_NAME = 'bangumi.bilibili.com'
- IE_DESC = 'BiliBili番剧'
-
- _TESTS = [{
- 'url': 'http://bangumi.bilibili.com/anime/1869',
- 'info_dict': {
- 'id': '1869',
- 'title': '混沌武士',
- 'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
- },
- 'playlist_count': 26,
- }, {
- 'url': 'http://bangumi.bilibili.com/anime/1869',
- 'info_dict': {
- 'id': '1869',
- 'title': '混沌武士',
- 'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
- },
- 'playlist': [{
- 'md5': '91da8621454dd58316851c27c68b0c13',
- 'info_dict': {
- 'id': '40062',
- 'ext': 'mp4',
- 'title': '混沌武士',
- 'description': '故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子...',
- 'timestamp': 1414538739,
- 'upload_date': '20141028',
- 'episode': '疾风怒涛 Tempestuous Temperaments',
- 'episode_number': 1,
- },
- }],
- 'params': {
- 'playlist_items': '1',
- },
- }]
-
- @classmethod
- def suitable(cls, url):
- return False if BiliBiliIE.suitable(url) else super(BiliBiliBangumiIE, cls).suitable(url)
-
- def _real_extract(self, url):
- bangumi_id = self._match_id(url)
-
- # Sometimes this API returns a JSONP response
- season_info = self._download_json(
- 'http://bangumi.bilibili.com/jsonp/seasoninfo/%s.ver' % bangumi_id,
- bangumi_id, transform_source=strip_jsonp)['result']
-
- entries = [{
- '_type': 'url_transparent',
- 'url': smuggle_url(episode['webplay_url'], {'no_bangumi_tip': 1}),
- 'ie_key': BiliBiliIE.ie_key(),
- 'timestamp': parse_iso8601(episode.get('update_time'), delimiter=' '),
- 'episode': episode.get('index_title'),
- 'episode_number': int_or_none(episode.get('index')),
- } for episode in season_info['episodes']]
-
- entries = sorted(entries, key=lambda entry: entry.get('episode_number'))
-
- return self.playlist_result(
- entries, bangumi_id,
- season_info.get('bangumi_title'), season_info.get('evaluate'))
-
-
-class BilibiliAudioBaseIE(InfoExtractor):
- def _call_api(self, path, sid, query=None):
- if not query:
- query = {'sid': sid}
- return self._download_json(
- 'https://www.bilibili.com/audio/music-service-c/web/' + path,
- sid, query=query)['data']
-
-
-class BilibiliAudioIE(BilibiliAudioBaseIE):
- _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
- _TEST = {
- 'url': 'https://www.bilibili.com/audio/au1003142',
- 'md5': 'fec4987014ec94ef9e666d4d158ad03b',
- 'info_dict': {
- 'id': '1003142',
- 'ext': 'm4a',
- 'title': '【tsukimi】YELLOW / 神山羊',
- 'artist': 'tsukimi',
- 'comment_count': int,
- 'description': 'YELLOW的mp3版!',
- 'duration': 183,
- 'subtitles': {
- 'origin': [{
- 'ext': 'lrc',
- }],
- },
- 'thumbnail': r're:^https?://.+\.jpg',
- 'timestamp': 1564836614,
- 'upload_date': '20190803',
- 'uploader': 'tsukimi-つきみぐー',
- 'view_count': int,
- },
- }
-
- def _real_extract(self, url):
- au_id = self._match_id(url)
-
- play_data = self._call_api('url', au_id)
- formats = [{
- 'url': play_data['cdns'][0],
- 'filesize': int_or_none(play_data.get('size')),
- }]
-
- song = self._call_api('song/info', au_id)
- title = song['title']
- statistic = song.get('statistic') or {}
-
- subtitles = None
- lyric = song.get('lyric')
- if lyric:
- subtitles = {
- 'origin': [{
- 'url': lyric,
- }]
- }
-
- return {
- 'id': au_id,
- 'title': title,
- 'formats': formats,
- 'artist': song.get('author'),
- 'comment_count': int_or_none(statistic.get('comment')),
- 'description': song.get('intro'),
- 'duration': int_or_none(song.get('duration')),
- 'subtitles': subtitles,
- 'thumbnail': song.get('cover'),
- 'timestamp': int_or_none(song.get('passtime')),
- 'uploader': song.get('uname'),
- 'view_count': int_or_none(statistic.get('play')),
- }
-
-
-class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
- _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
- _TEST = {
- 'url': 'https://www.bilibili.com/audio/am10624',
- 'info_dict': {
- 'id': '10624',
- 'title': '每日新曲推荐(每日11:00更新)',
- 'description': '每天11:00更新,为你推送最新音乐',
- },
- 'playlist_count': 19,
- }
-
- def _real_extract(self, url):
- am_id = self._match_id(url)
-
- songs = self._call_api(
- 'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
-
- entries = []
- for song in songs:
- sid = str_or_none(song.get('id'))
- if not sid:
- continue
- entries.append(self.url_result(
- 'https://www.bilibili.com/audio/au' + sid,
- BilibiliAudioIE.ie_key(), sid))
-
- if entries:
- album_data = self._call_api('menu/info', am_id) or {}
- album_title = album_data.get('title')
- if album_title:
- for entry in entries:
- entry['album'] = album_title
- return self.playlist_result(
- entries, am_id, album_title, album_data.get('intro'))
-
- return self.playlist_result(entries, am_id)
diff --git a/youtube_dl/extractor/biqle.py b/youtube_dl/extractor/biqle.py
deleted file mode 100644
index af21e3ee5..000000000
--- a/youtube_dl/extractor/biqle.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from .vk import VKIE
-from ..utils import (
- HEADRequest,
- int_or_none,
-)
-
-
-class BIQLEIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P<id>-?\d+_\d+)'
- _TESTS = [{
- # Youtube embed
- 'url': 'https://biqle.ru/watch/-115995369_456239081',
- 'md5': '97af5a06ee4c29bbf9c001bdb1cf5c06',
- 'info_dict': {
- 'id': '8v4f-avW-VI',
- 'ext': 'mp4',
- 'title': "PASSE-PARTOUT - L'ete c'est fait pour jouer",
- 'description': 'Passe-Partout',
- 'uploader_id': 'mrsimpsonstef3',
- 'uploader': 'Phanolito',
- 'upload_date': '20120822',
- },
- }, {
- 'url': 'http://biqle.org/watch/-44781847_168547604',
- 'md5': '7f24e72af1db0edf7c1aaba513174f97',
- 'info_dict': {
- 'id': '-44781847_168547604',
- 'ext': 'mp4',
- 'title': 'Ребенок в шоке от автоматической мойки',
- 'timestamp': 1396633454,
- 'uploader': 'Dmitry Kotov',
- 'upload_date': '20140404',
- 'uploader_id': '47850140',
- },
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- embed_url = self._proto_relative_url(self._search_regex(
- r'<iframe.+?src="((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^"]+)".*?></iframe>',
- webpage, 'embed url'))
- if VKIE.suitable(embed_url):
- return self.url_result(embed_url, VKIE.ie_key(), video_id)
-
- self._request_webpage(
- HEADRequest(embed_url), video_id, headers={'Referer': url})
- video_id, sig, _, access_token = self._get_cookies(embed_url)['video_ext'].value.split('%3A')
- item = self._download_json(
- 'https://api.vk.com/method/video.get', video_id,
- headers={'User-Agent': 'okhttp/3.4.1'}, query={
- 'access_token': access_token,
- 'sig': sig,
- 'v': 5.44,
- 'videos': video_id,
- })['response']['items'][0]
- title = item['title']
-
- formats = []
- for f_id, f_url in item.get('files', {}).items():
- if f_id == 'external':
- return self.url_result(f_url)
- ext, height = f_id.split('_')
- formats.append({
- 'format_id': height + 'p',
- 'url': f_url,
- 'height': int_or_none(height),
- 'ext': ext,
- })
- self._sort_formats(formats)
-
- thumbnails = []
- for k, v in item.items():
- if k.startswith('photo_') and v:
- width = k.replace('photo_', '')
- thumbnails.append({
- 'id': width,
- 'url': v,
- 'width': int_or_none(width),
- })
-
- return {
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- 'comment_count': int_or_none(item.get('comments')),
- 'description': item.get('description'),
- 'duration': int_or_none(item.get('duration')),
- 'thumbnails': thumbnails,
- 'timestamp': int_or_none(item.get('date')),
- 'uploader': item.get('owner_id'),
- 'view_count': int_or_none(item.get('views')),
- }
diff --git a/youtube_dl/extractor/bitchute.py b/youtube_dl/extractor/bitchute.py
deleted file mode 100644
index 430663fbf..000000000
--- a/youtube_dl/extractor/bitchute.py
+++ /dev/null
@@ -1,135 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import itertools
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- orderedSet,
- urlencode_postdata,
-)
-
-
-class BitChuteIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
- _TESTS = [{
- 'url': 'https://www.bitchute.com/video/szoMrox2JEI/',
- 'md5': '66c4a70e6bfc40dcb6be3eb1d74939eb',
- 'info_dict': {
- 'id': 'szoMrox2JEI',
- 'ext': 'mp4',
- 'title': 'Fuck bitches get money',
- 'description': 'md5:3f21f6fb5b1d17c3dee9cf6b5fe60b3a',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'Victoria X Rave',
- },
- }, {
- 'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/',
- 'only_matching': True,
- }, {
- 'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(
- 'https://www.bitchute.com/video/%s' % video_id, video_id, headers={
- 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
- })
-
- title = self._html_search_regex(
- (r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'),
- webpage, 'title', default=None) or self._html_search_meta(
- 'description', webpage, 'title',
- default=None) or self._og_search_description(webpage)
-
- format_urls = []
- for mobj in re.finditer(
- r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage):
- format_urls.append(mobj.group('url'))
- format_urls.extend(re.findall(r'as=(https?://[^&"\']+)', webpage))
-
- formats = [
- {'url': format_url}
- for format_url in orderedSet(format_urls)]
-
- if not formats:
- formats = self._parse_html5_media_entries(
- url, webpage, video_id)[0]['formats']
-
- self._check_formats(formats, video_id)
- self._sort_formats(formats)
-
- description = self._html_search_regex(
- r'(?s)<div\b[^>]+\bclass=["\']full hidden[^>]+>(.+?)</div>',
- webpage, 'description', fatal=False)
- thumbnail = self._og_search_thumbnail(
- webpage, default=None) or self._html_search_meta(
- 'twitter:image:src', webpage, 'thumbnail')
- uploader = self._html_search_regex(
- (r'(?s)<div class=["\']channel-banner.*?<p\b[^>]+\bclass=["\']name[^>]+>(.+?)</p>',
- r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'),
- webpage, 'uploader', fatal=False)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'uploader': uploader,
- 'formats': formats,
- }
-
-
-class BitChuteChannelIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?bitchute\.com/channel/(?P<id>[^/?#&]+)'
- _TEST = {
- 'url': 'https://www.bitchute.com/channel/victoriaxrave/',
- 'playlist_mincount': 185,
- 'info_dict': {
- 'id': 'victoriaxrave',
- },
- }
-
- _TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
-
- def _entries(self, channel_id):
- channel_url = 'https://www.bitchute.com/channel/%s/' % channel_id
- offset = 0
- for page_num in itertools.count(1):
- data = self._download_json(
- '%sextend/' % channel_url, channel_id,
- 'Downloading channel page %d' % page_num,
- data=urlencode_postdata({
- 'csrfmiddlewaretoken': self._TOKEN,
- 'name': '',
- 'offset': offset,
- }), headers={
- 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
- 'Referer': channel_url,
- 'X-Requested-With': 'XMLHttpRequest',
- 'Cookie': 'csrftoken=%s' % self._TOKEN,
- })
- if data.get('success') is False:
- break
- html = data.get('html')
- if not html:
- break
- video_ids = re.findall(
- r'class=["\']channel-videos-image-container[^>]+>\s*<a\b[^>]+\bhref=["\']/video/([^"\'/]+)',
- html)
- if not video_ids:
- break
- offset += len(video_ids)
- for video_id in video_ids:
- yield self.url_result(
- 'https://www.bitchute.com/video/%s' % video_id,
- ie=BitChuteIE.ie_key(), video_id=video_id)
-
- def _real_extract(self, url):
- channel_id = self._match_id(url)
- return self.playlist_result(
- self._entries(channel_id), playlist_id=channel_id)
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py
deleted file mode 100644
index 8e2f7217a..000000000
--- a/youtube_dl/extractor/brightcove.py
+++ /dev/null
@@ -1,651 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import base64
-import re
-import struct
-
-from .common import InfoExtractor
-from .adobepass import AdobePassIE
-from ..compat import (
- compat_etree_fromstring,
- compat_parse_qs,
- compat_urllib_parse_urlparse,
- compat_urlparse,
- compat_xml_parse_error,
- compat_HTTPError,
-)
-from ..utils import (
- ExtractorError,
- extract_attributes,
- find_xpath_attr,
- fix_xml_ampersands,
- float_or_none,
- js_to_json,
- int_or_none,
- parse_iso8601,
- smuggle_url,
- unescapeHTML,
- unsmuggle_url,
- update_url_query,
- clean_html,
- mimetype2ext,
- UnsupportedError,
-)
-
-
-class BrightcoveLegacyIE(InfoExtractor):
- IE_NAME = 'brightcove:legacy'
- _VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
-
- _TESTS = [
- {
- # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/
- 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',
- 'md5': '5423e113865d26e40624dce2e4b45d95',
- 'note': 'Test Brightcove downloads and detection in GenericIE',
- 'info_dict': {
- 'id': '2371591881001',
- 'ext': 'mp4',
- 'title': 'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
- 'uploader': '8TV',
- 'description': 'md5:a950cc4285c43e44d763d036710cd9cd',
- 'timestamp': 1368213670,
- 'upload_date': '20130510',
- 'uploader_id': '1589608506001',
- },
- 'skip': 'The player has been deactivated by the content owner',
- },
- {
- # From http://medianetwork.oracle.com/video/player/1785452137001
- 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1217746023001&flashID=myPlayer&%40videoPlayer=1785452137001',
- 'info_dict': {
- 'id': '1785452137001',
- 'ext': 'flv',
- 'title': 'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges',
- 'description': 'John Rose speaks at the JVM Language Summit, August 1, 2012.',
- 'uploader': 'Oracle',
- 'timestamp': 1344975024,
- 'upload_date': '20120814',
- 'uploader_id': '1460825906',
- },
- 'skip': 'video not playable',
- },
- {
- # From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/
- 'url': 'http://c.brightcove.com/services/viewer/federated_f9?&playerID=1265504713001&publisherID=AQ%7E%7E%2CAAABBzUwv1E%7E%2CxP-xFHVUstiMFlNYfvF4G9yFnNaqCw_9&videoID=2750934548001',
- 'info_dict': {
- 'id': '2750934548001',
- 'ext': 'mp4',
- 'title': 'This Bracelet Acts as a Personal Thermostat',
- 'description': 'md5:547b78c64f4112766ccf4e151c20b6a0',
- # 'uploader': 'Mashable',
- 'timestamp': 1382041798,
- 'upload_date': '20131017',
- 'uploader_id': '1130468786001',
- },
- },
- {
- # test that the default referer works
- # from http://national.ballet.ca/interact/video/Lost_in_Motion_II/
- 'url': 'http://link.brightcove.com/services/player/bcpid756015033001?bckey=AQ~~,AAAApYJi_Ck~,GxhXCegT1Dp39ilhXuxMJxasUhVNZiil&bctid=2878862109001',
- 'info_dict': {
- 'id': '2878862109001',
- 'ext': 'mp4',
- 'title': 'Lost in Motion II',
- 'description': 'md5:363109c02998fee92ec02211bd8000df',
- 'uploader': 'National Ballet of Canada',
- },
- 'skip': 'Video gone',
- },
- {
- # test flv videos served by akamaihd.net
- # From http://www.redbull.com/en/bike/stories/1331655643987/replay-uci-dh-world-cup-2014-from-fort-william
- 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?%40videoPlayer=ref%3Aevent-stream-356&linkBaseURL=http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fvideos%2F1331655630249%2Freplay-uci-fort-william-2014-dh&playerKey=AQ%7E%7E%2CAAAApYJ7UqE%7E%2Cxqr_zXk0I-zzNndy8NlHogrCb5QdyZRf&playerID=1398061561001#__youtubedl_smuggle=%7B%22Referer%22%3A+%22http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fstories%2F1331655643987%2Freplay-uci-dh-world-cup-2014-from-fort-william%22%7D',
- # The md5 checksum changes on each download
- 'info_dict': {
- 'id': '3750436379001',
- 'ext': 'flv',
- 'title': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals',
- 'uploader': 'RBTV Old (do not use)',
- 'description': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals',
- 'timestamp': 1409122195,
- 'upload_date': '20140827',
- 'uploader_id': '710858724001',
- },
- 'skip': 'Video gone',
- },
- {
- # playlist with 'videoList'
- # from http://support.brightcove.com/en/video-cloud/docs/playlist-support-single-video-players
- 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL',
- 'info_dict': {
- 'title': 'Sealife',
- 'id': '3550319591001',
- },
- 'playlist_mincount': 7,
- 'skip': 'Unsupported URL',
- },
- {
- # playlist with 'playlistTab' (https://github.com/ytdl-org/youtube-dl/issues/9965)
- 'url': 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=AQ%7E%7E,AAABXlLMdok%7E,NJ4EoMlZ4rZdx9eU1rkMVd8EaYPBBUlg',
- 'info_dict': {
- 'id': '1522758701001',
- 'title': 'Lesson 08',
- },
- 'playlist_mincount': 10,
- 'skip': 'Unsupported URL',
- },
- {
- # playerID inferred from bcpid
- # from http://www.un.org/chinese/News/story.asp?NewsID=27724
- 'url': 'https://link.brightcove.com/services/player/bcpid1722935254001/?bctid=5360463607001&autoStart=false&secureConnections=true&width=650&height=350',
- 'only_matching': True, # Tested in GenericIE
- }
- ]
-
- @classmethod
- def _build_brighcove_url(cls, object_str):
- """
- Build a Brightcove url from a xml string containing
- <object class="BrightcoveExperience">{params}</object>
- """
-
- # Fix up some stupid HTML, see https://github.com/ytdl-org/youtube-dl/issues/1553
- object_str = re.sub(r'(<param(?:\s+[a-zA-Z0-9_]+="[^"]*")*)>',
- lambda m: m.group(1) + '/>', object_str)
- # Fix up some stupid XML, see https://github.com/ytdl-org/youtube-dl/issues/1608
- object_str = object_str.replace('<--', '<!--')
- # remove namespace to simplify extraction
- object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str)
- object_str = fix_xml_ampersands(object_str)
-
- try:
- object_doc = compat_etree_fromstring(object_str.encode('utf-8'))
- except compat_xml_parse_error:
- return
-
- fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
- if fv_el is not None:
- flashvars = dict(
- (k, v[0])
- for k, v in compat_parse_qs(fv_el.attrib['value']).items())
- else:
- flashvars = {}
-
- data_url = object_doc.attrib.get('data', '')
- data_url_params = compat_parse_qs(compat_urllib_parse_urlparse(data_url).query)
-
- def find_param(name):
- if name in flashvars:
- return flashvars[name]
- node = find_xpath_attr(object_doc, './param', 'name', name)
- if node is not None:
- return node.attrib['value']
- return data_url_params.get(name)
-
- params = {}
-
- playerID = find_param('playerID') or find_param('playerId')
- if playerID is None:
- raise ExtractorError('Cannot find player ID')
- params['playerID'] = playerID
-
- playerKey = find_param('playerKey')
- # Not all pages define this value
- if playerKey is not None:
- params['playerKey'] = playerKey
- # These fields hold the id of the video
- videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList')
- if videoPlayer is not None:
- if isinstance(videoPlayer, list):
- videoPlayer = videoPlayer[0]
- videoPlayer = videoPlayer.strip()
- # UUID is also possible for videoPlayer (e.g.
- # http://www.popcornflix.com/hoodies-vs-hooligans/7f2d2b87-bbf2-4623-acfb-ea942b4f01dd
- # or http://www8.hp.com/cn/zh/home.html)
- if not (re.match(
- r'^(?:\d+|[\da-fA-F]{8}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{12})$',
- videoPlayer) or videoPlayer.startswith('ref:')):
- return None
- params['@videoPlayer'] = videoPlayer
- linkBase = find_param('linkBaseURL')
- if linkBase is not None:
- params['linkBaseURL'] = linkBase
- return cls._make_brightcove_url(params)
-
- @classmethod
- def _build_brighcove_url_from_js(cls, object_js):
- # The layout of JS is as follows:
- # customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) {
- # // build Brightcove <object /> XML
- # }
- m = re.search(
- r'''(?x)customBC\.createVideo\(
- .*? # skipping width and height
- ["\'](?P<playerID>\d+)["\']\s*,\s* # playerID
- ["\'](?P<playerKey>AQ[^"\']{48})[^"\']*["\']\s*,\s* # playerKey begins with AQ and is 50 characters
- # in length, however it's appended to itself
- # in places, so truncate
- ["\'](?P<videoID>\d+)["\'] # @videoPlayer
- ''', object_js)
- if m:
- return cls._make_brightcove_url(m.groupdict())
-
- @classmethod
- def _make_brightcove_url(cls, params):
- return update_url_query(
- 'http://c.brightcove.com/services/viewer/htmlFederated', params)
-
- @classmethod
- def _extract_brightcove_url(cls, webpage):
- """Try to extract the brightcove url from the webpage, returns None
- if it can't be found
- """
- urls = cls._extract_brightcove_urls(webpage)
- return urls[0] if urls else None
-
- @classmethod
- def _extract_brightcove_urls(cls, webpage):
- """Return a list of all Brightcove URLs from the webpage """
-
- url_m = re.search(
- r'''(?x)
- <meta\s+
- (?:property|itemprop)=([\'"])(?:og:video|embedURL)\1[^>]+
- content=([\'"])(?P<url>https?://(?:secure|c)\.brightcove.com/(?:(?!\2).)+)\2
- ''', webpage)
- if url_m:
- url = unescapeHTML(url_m.group('url'))
- # Some sites don't add it, we can't download with this url, for example:
- # http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
- if 'playerKey' in url or 'videoId' in url or 'idVideo' in url:
- return [url]
-
- matches = re.findall(
- r'''(?sx)<object
- (?:
- [^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |
- [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
- ).+?>\s*</object>''',
- webpage)
- if matches:
- return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
-
- matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)
- if matches:
- return list(filter(None, [
- cls._build_brighcove_url_from_js(custom_bc)
- for custom_bc in matches]))
- return [src for _, src in re.findall(
- r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)]
-
- def _real_extract(self, url):
- url, smuggled_data = unsmuggle_url(url, {})
-
- # Change the 'videoId' and others field to '@videoPlayer'
- url = re.sub(r'(?<=[?&])(videoI(d|D)|idVideo|bctid)', '%40videoPlayer', url)
- # Change bckey (used by bcove.me urls) to playerKey
- url = re.sub(r'(?<=[?&])bckey', 'playerKey', url)
- mobj = re.match(self._VALID_URL, url)
- query_str = mobj.group('query')
- query = compat_urlparse.parse_qs(query_str)
-
- videoPlayer = query.get('@videoPlayer')
- if videoPlayer:
- # We set the original url as the default 'Referer' header
- referer = query.get('linkBaseURL', [None])[0] or smuggled_data.get('Referer', url)
- video_id = videoPlayer[0]
- if 'playerID' not in query:
- mobj = re.search(r'/bcpid(\d+)', url)
- if mobj is not None:
- query['playerID'] = [mobj.group(1)]
- publisher_id = query.get('publisherId')
- if publisher_id and publisher_id[0].isdigit():
- publisher_id = publisher_id[0]
- if not publisher_id:
- player_key = query.get('playerKey')
- if player_key and ',' in player_key[0]:
- player_key = player_key[0]
- else:
- player_id = query.get('playerID')
- if player_id and player_id[0].isdigit():
- headers = {}
- if referer:
- headers['Referer'] = referer
- player_page = self._download_webpage(
- 'http://link.brightcove.com/services/player/bcpid' + player_id[0],
- video_id, headers=headers, fatal=False)
- if player_page:
- player_key = self._search_regex(
- r'<param\s+name="playerKey"\s+value="([\w~,-]+)"',
- player_page, 'player key', fatal=False)
- if player_key:
- enc_pub_id = player_key.split(',')[1].replace('~', '=')
- publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
- if publisher_id:
- brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
- if referer:
- brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer})
- return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
- # TODO: figure out if it's possible to extract playlistId from playerKey
- # elif 'playerKey' in query:
- # player_key = query['playerKey']
- # return self._get_playlist_info(player_key[0])
- raise UnsupportedError(url)
-
-
-class BrightcoveNewIE(AdobePassIE):
- IE_NAME = 'brightcove:new'
- _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*(?P<content_type>video|playlist)Id=(?P<video_id>\d+|ref:[^&]+)'
- _TESTS = [{
- 'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
- 'md5': 'c8100925723840d4b0d243f7025703be',
- 'info_dict': {
- 'id': '4463358922001',
- 'ext': 'mp4',
- 'title': 'Meet the man behind Popcorn Time',
- 'description': 'md5:eac376a4fe366edc70279bfb681aea16',
- 'duration': 165.768,
- 'timestamp': 1441391203,
- 'upload_date': '20150904',
- 'uploader_id': '929656772001',
- 'formats': 'mincount:20',
- },
- }, {
- # with rtmp streams
- 'url': 'http://players.brightcove.net/4036320279001/5d112ed9-283f-485f-a7f9-33f42e8bc042_default/index.html?videoId=4279049078001',
- 'info_dict': {
- 'id': '4279049078001',
- 'ext': 'mp4',
- 'title': 'Titansgrave: Chapter 0',
- 'description': 'Titansgrave: Chapter 0',
- 'duration': 1242.058,
- 'timestamp': 1433556729,
- 'upload_date': '20150606',
- 'uploader_id': '4036320279001',
- 'formats': 'mincount:39',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- }
- }, {
- # playlist stream
- 'url': 'https://players.brightcove.net/1752604059001/S13cJdUBz_default/index.html?playlistId=5718313430001',
- 'info_dict': {
- 'id': '5718313430001',
- 'title': 'No Audio Playlist',
- },
- 'playlist_count': 7,
- 'params': {
- # m3u8 download
- 'skip_download': True,
- }
- }, {
- 'url': 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=5743160747001',
- 'only_matching': True,
- }, {
- # ref: prefixed video id
- 'url': 'http://players.brightcove.net/3910869709001/21519b5c-4b3b-4363-accb-bdc8f358f823_default/index.html?videoId=ref:7069442',
- 'only_matching': True,
- }, {
- # non numeric ref: prefixed video id
- 'url': 'http://players.brightcove.net/710858724001/default_default/index.html?videoId=ref:event-stream-356',
- 'only_matching': True,
- }, {
- # unavailable video without message but with error_code
- 'url': 'http://players.brightcove.net/1305187701/c832abfb-641b-44eb-9da0-2fe76786505f_default/index.html?videoId=4377407326001',
- 'only_matching': True,
- }]
-
- @staticmethod
- def _extract_url(ie, webpage):
- urls = BrightcoveNewIE._extract_urls(ie, webpage)
- return urls[0] if urls else None
-
- @staticmethod
- def _extract_urls(ie, webpage):
- # Reference:
- # 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe
- # 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#tag
- # 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript
- # 4. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/in-page-embed-player-implementation.html
- # 5. https://support.brightcove.com/en/video-cloud/docs/dynamically-assigning-videos-player
-
- entries = []
-
- # Look for iframe embeds [1]
- for _, url in re.findall(
- r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
- entries.append(url if url.startswith('http') else 'http:' + url)
-
- # Look for <video> tags [2] and embed_in_page embeds [3]
- # [2] looks like:
- for video, script_tag, account_id, player_id, embed in re.findall(
- r'''(?isx)
- (<video\s+[^>]*\bdata-video-id\s*=\s*['"]?[^>]+>)
- (?:.*?
- (<script[^>]+
- src=["\'](?:https?:)?//players\.brightcove\.net/
- (\d+)/([^/]+)_([^/]+)/index(?:\.min)?\.js
- )
- )?
- ''', webpage):
- attrs = extract_attributes(video)
-
- # According to examples from [4] it's unclear whether video id
- # may be optional and what to do when it is
- video_id = attrs.get('data-video-id')
- if not video_id:
- continue
-
- account_id = account_id or attrs.get('data-account')
- if not account_id:
- continue
-
- player_id = player_id or attrs.get('data-player') or 'default'
- embed = embed or attrs.get('data-embed') or 'default'
-
- bc_url = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' % (
- account_id, player_id, embed, video_id)
-
- # Some brightcove videos may be embedded with video tag only and
- # without script tag or any mentioning of brightcove at all. Such
- # embeds are considered ambiguous since they are matched based only
- # on data-video-id and data-account attributes and in the wild may
- # not be brightcove embeds at all. Let's check reconstructed
- # brightcove URLs in case of such embeds and only process valid
- # ones. By this we ensure there is indeed a brightcove embed.
- if not script_tag and not ie._is_valid_url(
- bc_url, video_id, 'possible brightcove video'):
- continue
-
- entries.append(bc_url)
-
- return entries
-
- def _parse_brightcove_metadata(self, json_data, video_id, headers={}):
- title = json_data['name'].strip()
-
- formats = []
- for source in json_data.get('sources', []):
- container = source.get('container')
- ext = mimetype2ext(source.get('type'))
- src = source.get('src')
- # https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object
- if ext == 'ism' or container == 'WVM' or source.get('key_systems'):
- continue
- elif ext == 'm3u8' or container == 'M2TS':
- if not src:
- continue
- formats.extend(self._extract_m3u8_formats(
- src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
- elif ext == 'mpd':
- if not src:
- continue
- formats.extend(self._extract_mpd_formats(src, video_id, 'dash', fatal=False))
- else:
- streaming_src = source.get('streaming_src')
- stream_name, app_name = source.get('stream_name'), source.get('app_name')
- if not src and not streaming_src and (not stream_name or not app_name):
- continue
- tbr = float_or_none(source.get('avg_bitrate'), 1000)
- height = int_or_none(source.get('height'))
- width = int_or_none(source.get('width'))
- f = {
- 'tbr': tbr,
- 'filesize': int_or_none(source.get('size')),
- 'container': container,
- 'ext': ext or container.lower(),
- }
- if width == 0 and height == 0:
- f.update({
- 'vcodec': 'none',
- })
- else:
- f.update({
- 'width': width,
- 'height': height,
- 'vcodec': source.get('codec'),
- })
-
- def build_format_id(kind):
- format_id = kind
- if tbr:
- format_id += '-%dk' % int(tbr)
- if height:
- format_id += '-%dp' % height
- return format_id
-
- if src or streaming_src:
- f.update({
- 'url': src or streaming_src,
- 'format_id': build_format_id('http' if src else 'http-streaming'),
- 'source_preference': 0 if src else -1,
- })
- else:
- f.update({
- 'url': app_name,
- 'play_path': stream_name,
- 'format_id': build_format_id('rtmp'),
- })
- formats.append(f)
- if not formats:
- # for sonyliv.com DRM protected videos
- s3_source_url = json_data.get('custom_fields', {}).get('s3sourceurl')
- if s3_source_url:
- formats.append({
- 'url': s3_source_url,
- 'format_id': 'source',
- })
-
- errors = json_data.get('errors')
- if not formats and errors:
- error = errors[0]
- raise ExtractorError(
- error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
-
- self._sort_formats(formats)
-
- for f in formats:
- f.setdefault('http_headers', {}).update(headers)
-
- subtitles = {}
- for text_track in json_data.get('text_tracks', []):
- if text_track.get('src'):
- subtitles.setdefault(text_track.get('srclang'), []).append({
- 'url': text_track['src'],
- })
-
- is_live = False
- duration = float_or_none(json_data.get('duration'), 1000)
- if duration is not None and duration <= 0:
- is_live = True
-
- return {
- 'id': video_id,
- 'title': self._live_title(title) if is_live else title,
- 'description': clean_html(json_data.get('description')),
- 'thumbnail': json_data.get('thumbnail') or json_data.get('poster'),
- 'duration': duration,
- 'timestamp': parse_iso8601(json_data.get('published_at')),
- 'uploader_id': json_data.get('account_id'),
- 'formats': formats,
- 'subtitles': subtitles,
- 'tags': json_data.get('tags', []),
- 'is_live': is_live,
- }
-
- def _real_extract(self, url):
- url, smuggled_data = unsmuggle_url(url, {})
- self._initialize_geo_bypass({
- 'countries': smuggled_data.get('geo_countries'),
- 'ip_blocks': smuggled_data.get('geo_ip_blocks'),
- })
-
- account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups()
-
- webpage = self._download_webpage(
- 'http://players.brightcove.net/%s/%s_%s/index.min.js'
- % (account_id, player_id, embed), video_id)
-
- policy_key = None
-
- catalog = self._search_regex(
- r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
- if catalog:
- catalog = self._parse_json(
- js_to_json(catalog), video_id, fatal=False)
- if catalog:
- policy_key = catalog.get('policyKey')
-
- if not policy_key:
- policy_key = self._search_regex(
- r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
- webpage, 'policy key', group='pk')
-
- api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id)
- headers = {
- 'Accept': 'application/json;pk=%s' % policy_key,
- }
- referrer = smuggled_data.get('referrer')
- if referrer:
- headers.update({
- 'Referer': referrer,
- 'Origin': re.search(r'https?://[^/]+', referrer).group(0),
- })
- try:
- json_data = self._download_json(api_url, video_id, headers=headers)
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
- json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
- message = json_data.get('message') or json_data['error_code']
- if json_data.get('error_subcode') == 'CLIENT_GEO':
- self.raise_geo_restricted(msg=message)
- raise ExtractorError(message, expected=True)
- raise
-
- errors = json_data.get('errors')
- if errors and errors[0].get('error_subcode') == 'TVE_AUTH':
- custom_fields = json_data['custom_fields']
- tve_token = self._extract_mvpd_auth(
- smuggled_data['source_url'], video_id,
- custom_fields['bcadobepassrequestorid'],
- custom_fields['bcadobepassresourceid'])
- json_data = self._download_json(
- api_url, video_id, headers={
- 'Accept': 'application/json;pk=%s' % policy_key
- }, query={
- 'tveToken': tve_token,
- })
-
- if content_type == 'playlist':
- return self.playlist_result(
- [self._parse_brightcove_metadata(vid, vid.get('id'), headers)
- for vid in json_data.get('videos', []) if vid.get('id')],
- json_data.get('id'), json_data.get('name'),
- json_data.get('description'))
-
- return self._parse_brightcove_metadata(
- json_data, video_id, headers=headers)
diff --git a/youtube_dl/extractor/businessinsider.py b/youtube_dl/extractor/businessinsider.py
deleted file mode 100644
index dfcf9bc6b..000000000
--- a/youtube_dl/extractor/businessinsider.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from .jwplatform import JWPlatformIE
-
-
-class BusinessInsiderIE(InfoExtractor):
- _VALID_URL = r'https?://(?:[^/]+\.)?businessinsider\.(?:com|nl)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
- _TESTS = [{
- 'url': 'http://uk.businessinsider.com/how-much-radiation-youre-exposed-to-in-everyday-life-2016-6',
- 'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',
- 'info_dict': {
- 'id': 'hZRllCfw',
- 'ext': 'mp4',
- 'title': "Here's how much radiation you're exposed to in everyday life",
- 'description': 'md5:9a0d6e2c279948aadaa5e84d6d9b99bd',
- 'upload_date': '20170709',
- 'timestamp': 1499606400,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'https://www.businessinsider.nl/5-scientifically-proven-things-make-you-less-attractive-2017-7/',
- 'only_matching': True,
- }, {
- 'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- jwplatform_id = self._search_regex(
- (r'data-media-id=["\']([a-zA-Z0-9]{8})',
- r'id=["\']jwplayer_([a-zA-Z0-9]{8})',
- r'id["\']?\s*:\s*["\']?([a-zA-Z0-9]{8})'),
- webpage, 'jwplatform id')
- return self.url_result(
- 'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(),
- video_id=video_id)
diff --git a/youtube_dl/extractor/canvas.py b/youtube_dl/extractor/canvas.py
deleted file mode 100644
index c506bc5dd..000000000
--- a/youtube_dl/extractor/canvas.py
+++ /dev/null
@@ -1,319 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-import json
-
-from .common import InfoExtractor
-from .gigya import GigyaBaseIE
-from ..compat import compat_HTTPError
-from ..utils import (
- ExtractorError,
- strip_or_none,
- float_or_none,
- int_or_none,
- merge_dicts,
- parse_iso8601,
-)
-
-
-class CanvasIE(InfoExtractor):
- _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza)/assets/(?P<id>[^/?#&]+)'
- _TESTS = [{
- 'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
- 'md5': '90139b746a0a9bd7bb631283f6e2a64e',
- 'info_dict': {
- 'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
- 'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
- 'ext': 'flv',
- 'title': 'Nachtwacht: De Greystook',
- 'description': 'md5:1db3f5dc4c7109c821261e7512975be7',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 1468.03,
- },
- 'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
- }, {
- 'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
- 'only_matching': True,
- }]
- _HLS_ENTRY_PROTOCOLS_MAP = {
- 'HLS': 'm3u8_native',
- 'HLS_AES': 'm3u8',
- }
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- site_id, video_id = mobj.group('site_id'), mobj.group('id')
-
- data = self._download_json(
- 'https://mediazone.vrt.be/api/v1/%s/assets/%s'
- % (site_id, video_id), video_id)
-
- title = data['title']
- description = data.get('description')
-
- formats = []
- for target in data['targetUrls']:
- format_url, format_type = target.get('url'), target.get('type')
- if not format_url or not format_type:
- continue
- if format_type in self._HLS_ENTRY_PROTOCOLS_MAP:
- formats.extend(self._extract_m3u8_formats(
- format_url, video_id, 'mp4', self._HLS_ENTRY_PROTOCOLS_MAP[format_type],
- m3u8_id=format_type, fatal=False))
- elif format_type == 'HDS':
- formats.extend(self._extract_f4m_formats(
- format_url, video_id, f4m_id=format_type, fatal=False))
- elif format_type == 'MPEG_DASH':
- formats.extend(self._extract_mpd_formats(
- format_url, video_id, mpd_id=format_type, fatal=False))
- elif format_type == 'HSS':
- formats.extend(self._extract_ism_formats(
- format_url, video_id, ism_id='mss', fatal=False))
- else:
- formats.append({
- 'format_id': format_type,
- 'url': format_url,
- })
- self._sort_formats(formats)
-
- subtitles = {}
- subtitle_urls = data.get('subtitleUrls')
- if isinstance(subtitle_urls, list):
- for subtitle in subtitle_urls:
- subtitle_url = subtitle.get('url')
- if subtitle_url and subtitle.get('type') == 'CLOSED':
- subtitles.setdefault('nl', []).append({'url': subtitle_url})
-
- return {
- 'id': video_id,
- 'display_id': video_id,
- 'title': title,
- 'description': description,
- 'formats': formats,
- 'duration': float_or_none(data.get('duration'), 1000),
- 'thumbnail': data.get('posterImageUrl'),
- 'subtitles': subtitles,
- }
-
-
-class CanvasEenIE(InfoExtractor):
- IE_DESC = 'canvas.be and een.be'
- _VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
- _TESTS = [{
- 'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
- 'md5': 'ed66976748d12350b118455979cca293',
- 'info_dict': {
- 'id': 'mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
- 'display_id': 'de-afspraak-veilt-voor-de-warmste-week',
- 'ext': 'flv',
- 'title': 'De afspraak veilt voor de Warmste Week',
- 'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 49.02,
- },
- 'expected_warnings': ['is not a supported codec'],
- }, {
- # with subtitles
- 'url': 'http://www.canvas.be/video/panorama/2016/pieter-0167',
- 'info_dict': {
- 'id': 'mz-ast-5240ff21-2d30-4101-bba6-92b5ec67c625',
- 'display_id': 'pieter-0167',
- 'ext': 'mp4',
- 'title': 'Pieter 0167',
- 'description': 'md5:943cd30f48a5d29ba02c3a104dc4ec4e',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 2553.08,
- 'subtitles': {
- 'nl': [{
- 'ext': 'vtt',
- }],
- },
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'Pagina niet gevonden',
- }, {
- 'url': 'https://www.een.be/sorry-voor-alles/herbekijk-sorry-voor-alles',
- 'info_dict': {
- 'id': 'mz-ast-11a587f8-b921-4266-82e2-0bce3e80d07f',
- 'display_id': 'herbekijk-sorry-voor-alles',
- 'ext': 'mp4',
- 'title': 'Herbekijk Sorry voor alles',
- 'description': 'md5:8bb2805df8164e5eb95d6a7a29dc0dd3',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 3788.06,
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'Episode no longer available',
- }, {
- 'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- site_id, display_id = mobj.group('site_id'), mobj.group('id')
-
- webpage = self._download_webpage(url, display_id)
-
- title = strip_or_none(self._search_regex(
- r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>',
- webpage, 'title', default=None) or self._og_search_title(
- webpage, default=None))
-
- video_id = self._html_search_regex(
- r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id',
- group='id')
-
- return {
- '_type': 'url_transparent',
- 'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (site_id, video_id),
- 'ie_key': CanvasIE.ie_key(),
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': self._og_search_description(webpage),
- }
-
-
-class VrtNUIE(GigyaBaseIE):
- IE_DESC = 'VrtNU.be'
- _VALID_URL = r'https?://(?:www\.)?vrt\.be/(?P<site_id>vrtnu)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
- _TESTS = [{
- 'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/',
- 'info_dict': {
- 'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
- 'ext': 'flv',
- 'title': 'De zwarte weduwe',
- 'description': 'md5:d90c21dced7db869a85db89a623998d4',
- 'duration': 1457.04,
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'season': '1',
- 'season_number': 1,
- 'episode_number': 1,
- },
- 'skip': 'This video is only available for registered users'
- }]
- _NETRC_MACHINE = 'vrtnu'
- _APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy'
- _CONTEXT_ID = 'R3595707040'
-
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
- auth_data = {
- 'APIKey': self._APIKEY,
- 'targetEnv': 'jssdk',
- 'loginID': username,
- 'password': password,
- 'authMode': 'cookie',
- }
-
- auth_info = self._gigya_login(auth_data)
-
- # Sometimes authentication fails for no good reason, retry
- login_attempt = 1
- while login_attempt <= 3:
- try:
- # When requesting a token, no actual token is returned, but the
- # necessary cookies are set.
- self._request_webpage(
- 'https://token.vrt.be',
- None, note='Requesting a token', errnote='Could not get a token',
- headers={
- 'Content-Type': 'application/json',
- 'Referer': 'https://www.vrt.be/vrtnu/',
- },
- data=json.dumps({
- 'uid': auth_info['UID'],
- 'uidsig': auth_info['UIDSignature'],
- 'ts': auth_info['signatureTimestamp'],
- 'email': auth_info['profile']['email'],
- }).encode('utf-8'))
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
- login_attempt += 1
- self.report_warning('Authentication failed')
- self._sleep(1, None, msg_template='Waiting for %(timeout)s seconds before trying again')
- else:
- raise e
- else:
- break
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage, urlh = self._download_webpage_handle(url, display_id)
-
- info = self._search_json_ld(webpage, display_id, default={})
-
- # title is optional here since it may be extracted by extractor
- # that is delegated from here
- title = strip_or_none(self._html_search_regex(
- r'(?ms)<h1 class="content__heading">(.+?)</h1>',
- webpage, 'title', default=None))
-
- description = self._html_search_regex(
- r'(?ms)<div class="content__description">(.+?)</div>',
- webpage, 'description', default=None)
-
- season = self._html_search_regex(
- [r'''(?xms)<div\ class="tabs__tab\ tabs__tab--active">\s*
- <span>seizoen\ (.+?)</span>\s*
- </div>''',
- r'<option value="seizoen (\d{1,3})" data-href="[^"]+?" selected>'],
- webpage, 'season', default=None)
-
- season_number = int_or_none(season)
-
- episode_number = int_or_none(self._html_search_regex(
- r'''(?xms)<div\ class="content__episode">\s*
- <abbr\ title="aflevering">afl</abbr>\s*<span>(\d+)</span>
- </div>''',
- webpage, 'episode_number', default=None))
-
- release_date = parse_iso8601(self._html_search_regex(
- r'(?ms)<div class="content__broadcastdate">\s*<time\ datetime="(.+?)"',
- webpage, 'release_date', default=None))
-
- # If there's a ? or a # in the URL, remove them and everything after
- clean_url = urlh.geturl().split('?')[0].split('#')[0].strip('/')
- securevideo_url = clean_url + '.mssecurevideo.json'
-
- try:
- video = self._download_json(securevideo_url, display_id)
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
- self.raise_login_required()
- raise
-
- # We are dealing with a '../<show>.relevant' URL
- redirect_url = video.get('url')
- if redirect_url:
- return self.url_result(self._proto_relative_url(redirect_url, 'https:'))
-
- # There is only one entry, but with an unknown key, so just get
- # the first one
- video_id = list(video.values())[0].get('videoid')
-
- return merge_dicts(info, {
- '_type': 'url_transparent',
- 'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
- 'ie_key': CanvasIE.ie_key(),
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': description,
- 'season': season,
- 'season_number': season_number,
- 'episode_number': episode_number,
- 'release_date': release_date,
- })
diff --git a/youtube_dl/extractor/cbc.py b/youtube_dl/extractor/cbc.py
deleted file mode 100644
index 751a3a8f2..000000000
--- a/youtube_dl/extractor/cbc.py
+++ /dev/null
@@ -1,457 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import json
-import re
-
-from .common import InfoExtractor
-from ..compat import (
- compat_str,
- compat_HTTPError,
-)
-from ..utils import (
- js_to_json,
- smuggle_url,
- try_get,
- xpath_text,
- xpath_element,
- xpath_with_ns,
- find_xpath_attr,
- orderedSet,
- parse_duration,
- parse_iso8601,
- parse_age_limit,
- strip_or_none,
- int_or_none,
- ExtractorError,
-)
-
-
-class CBCIE(InfoExtractor):
- IE_NAME = 'cbc.ca'
- _VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?!player/)(?:[^/]+/)+(?P<id>[^/?#]+)'
- _TESTS = [{
- # with mediaId
- 'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs',
- 'md5': '97e24d09672fc4cf56256d6faa6c25bc',
- 'info_dict': {
- 'id': '2682904050',
- 'ext': 'mp4',
- 'title': 'Don Cherry – All-Stars',
- 'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.',
- 'timestamp': 1454463000,
- 'upload_date': '20160203',
- 'uploader': 'CBCC-NEW',
- },
- 'skip': 'Geo-restricted to Canada',
- }, {
- # with clipId, feed available via tpfeed.cbc.ca and feed.theplatform.com
- 'url': 'http://www.cbc.ca/22minutes/videos/22-minutes-update/22-minutes-update-episode-4',
- 'md5': '162adfa070274b144f4fdc3c3b8207db',
- 'info_dict': {
- 'id': '2414435309',
- 'ext': 'mp4',
- 'title': '22 Minutes Update: What Not To Wear Quebec',
- 'description': "This week's latest Canadian top political story is What Not To Wear Quebec.",
- 'upload_date': '20131025',
- 'uploader': 'CBCC-NEW',
- 'timestamp': 1382717907,
- },
- }, {
- # with clipId, feed only available via tpfeed.cbc.ca
- 'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
- 'md5': '0274a90b51a9b4971fe005c63f592f12',
- 'info_dict': {
- 'id': '2487345465',
- 'ext': 'mp4',
- 'title': 'Robin Williams freestyles on 90 Minutes Live',
- 'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.',
- 'upload_date': '19780210',
- 'uploader': 'CBCC-NEW',
- 'timestamp': 255977160,
- },
- }, {
- # multiple iframes
- 'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot',
- 'playlist': [{
- 'md5': '377572d0b49c4ce0c9ad77470e0b96b4',
- 'info_dict': {
- 'id': '2680832926',
- 'ext': 'mp4',
- 'title': 'An Eagle\'s-Eye View Off Burrard Bridge',
- 'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.',
- 'upload_date': '20160201',
- 'timestamp': 1454342820,
- 'uploader': 'CBCC-NEW',
- },
- }, {
- 'md5': '415a0e3f586113894174dfb31aa5bb1a',
- 'info_dict': {
- 'id': '2658915080',
- 'ext': 'mp4',
- 'title': 'Fly like an eagle!',
- 'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower',
- 'upload_date': '20150315',
- 'timestamp': 1426443984,
- 'uploader': 'CBCC-NEW',
- },
- }],
- 'skip': 'Geo-restricted to Canada',
- }, {
- # multiple CBC.APP.Caffeine.initInstance(...)
- 'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238',
- 'info_dict': {
- 'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks',
- 'id': 'dog-indoor-exercise-winter-1.3928238',
- 'description': 'md5:c18552e41726ee95bd75210d1ca9194c',
- },
- 'playlist_mincount': 6,
- }]
-
- @classmethod
- def suitable(cls, url):
- return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url)
-
- def _extract_player_init(self, player_init, display_id):
- player_info = self._parse_json(player_init, display_id, js_to_json)
- media_id = player_info.get('mediaId')
- if not media_id:
- clip_id = player_info['clipId']
- feed = self._download_json(
- 'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id,
- clip_id, fatal=False)
- if feed:
- media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str)
- if not media_id:
- media_id = self._download_json(
- 'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
- clip_id)['entries'][0]['id'].split('/')[-1]
- return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
- title = self._og_search_title(webpage, default=None) or self._html_search_meta(
- 'twitter:title', webpage, 'title', default=None) or self._html_search_regex(
- r'<title>([^<]+)</title>', webpage, 'title', fatal=False)
- entries = [
- self._extract_player_init(player_init, display_id)
- for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]
- media_ids = []
- for media_id_re in (
- r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"',
- r'<div[^>]+\bid=["\']player-(\d+)',
- r'guid["\']\s*:\s*["\'](\d+)'):
- media_ids.extend(re.findall(media_id_re, webpage))
- entries.extend([
- self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
- for media_id in orderedSet(media_ids)])
- return self.playlist_result(
- entries, display_id, strip_or_none(title),
- self._og_search_description(webpage))
-
-
-class CBCPlayerIE(InfoExtractor):
- IE_NAME = 'cbc.ca:player'
- _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)'
- _TESTS = [{
- 'url': 'http://www.cbc.ca/player/play/2683190193',
- 'md5': '64d25f841ddf4ddb28a235338af32e2c',
- 'info_dict': {
- 'id': '2683190193',
- 'ext': 'mp4',
- 'title': 'Gerry Runs a Sweat Shop',
- 'description': 'md5:b457e1c01e8ff408d9d801c1c2cd29b0',
- 'timestamp': 1455071400,
- 'upload_date': '20160210',
- 'uploader': 'CBCC-NEW',
- },
- 'skip': 'Geo-restricted to Canada',
- }, {
- # Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
- 'url': 'http://www.cbc.ca/player/play/2657631896',
- 'md5': 'e5e708c34ae6fca156aafe17c43e8b75',
- 'info_dict': {
- 'id': '2657631896',
- 'ext': 'mp3',
- 'title': 'CBC Montreal is organizing its first ever community hackathon!',
- 'description': 'The modern technology we tend to depend on so heavily, is never without it\'s share of hiccups and headaches. Next weekend - CBC Montreal will be getting members of the public for its first Hackathon.',
- 'timestamp': 1425704400,
- 'upload_date': '20150307',
- 'uploader': 'CBCC-NEW',
- },
- }, {
- 'url': 'http://www.cbc.ca/player/play/2164402062',
- 'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6',
- 'info_dict': {
- 'id': '2164402062',
- 'ext': 'mp4',
- 'title': 'Cancer survivor four times over',
- 'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
- 'timestamp': 1320410746,
- 'upload_date': '20111104',
- 'uploader': 'CBCC-NEW',
- },
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- return {
- '_type': 'url_transparent',
- 'ie_key': 'ThePlatform',
- 'url': smuggle_url(
- 'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/%s?mbr=true&formats=MPEG4,FLV,MP3' % video_id, {
- 'force_smil_url': True
- }),
- 'id': video_id,
- }
-
-
-class CBCWatchBaseIE(InfoExtractor):
- _device_id = None
- _device_token = None
- _API_BASE_URL = 'https://api-cbc.cloud.clearleap.com/cloffice/client/'
- _NS_MAP = {
- 'media': 'http://search.yahoo.com/mrss/',
- 'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/',
- }
- _GEO_COUNTRIES = ['CA']
-
- def _call_api(self, path, video_id):
- url = path if path.startswith('http') else self._API_BASE_URL + path
- for _ in range(2):
- try:
- result = self._download_xml(url, video_id, headers={
- 'X-Clearleap-DeviceId': self._device_id,
- 'X-Clearleap-DeviceToken': self._device_token,
- })
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
- # Device token has expired, re-acquiring device token
- self._register_device()
- continue
- raise
- error_message = xpath_text(result, 'userMessage') or xpath_text(result, 'systemMessage')
- if error_message:
- raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message))
- return result
-
- def _real_initialize(self):
- if self._valid_device_token():
- return
- device = self._downloader.cache.load('cbcwatch', 'device') or {}
- self._device_id, self._device_token = device.get('id'), device.get('token')
- if self._valid_device_token():
- return
- self._register_device()
-
- def _valid_device_token(self):
- return self._device_id and self._device_token
-
- def _register_device(self):
- self._device_id = self._device_token = None
- result = self._download_xml(
- self._API_BASE_URL + 'device/register',
- None, 'Acquiring device token',
- data=b'<device><type>web</type></device>')
- self._device_id = xpath_text(result, 'deviceId', fatal=True)
- self._device_token = xpath_text(result, 'deviceToken', fatal=True)
- self._downloader.cache.store(
- 'cbcwatch', 'device', {
- 'id': self._device_id,
- 'token': self._device_token,
- })
-
- def _parse_rss_feed(self, rss):
- channel = xpath_element(rss, 'channel', fatal=True)
-
- def _add_ns(path):
- return xpath_with_ns(path, self._NS_MAP)
-
- entries = []
- for item in channel.findall('item'):
- guid = xpath_text(item, 'guid', fatal=True)
- title = xpath_text(item, 'title', fatal=True)
-
- media_group = xpath_element(item, _add_ns('media:group'), fatal=True)
- content = xpath_element(media_group, _add_ns('media:content'), fatal=True)
- content_url = content.attrib['url']
-
- thumbnails = []
- for thumbnail in media_group.findall(_add_ns('media:thumbnail')):
- thumbnail_url = thumbnail.get('url')
- if not thumbnail_url:
- continue
- thumbnails.append({
- 'id': thumbnail.get('profile'),
- 'url': thumbnail_url,
- 'width': int_or_none(thumbnail.get('width')),
- 'height': int_or_none(thumbnail.get('height')),
- })
-
- timestamp = None
- release_date = find_xpath_attr(
- item, _add_ns('media:credit'), 'role', 'releaseDate')
- if release_date is not None:
- timestamp = parse_iso8601(release_date.text)
-
- entries.append({
- '_type': 'url_transparent',
- 'url': content_url,
- 'id': guid,
- 'title': title,
- 'description': xpath_text(item, 'description'),
- 'timestamp': timestamp,
- 'duration': int_or_none(content.get('duration')),
- 'age_limit': parse_age_limit(xpath_text(item, _add_ns('media:rating'))),
- 'episode': xpath_text(item, _add_ns('clearleap:episode')),
- 'episode_number': int_or_none(xpath_text(item, _add_ns('clearleap:episodeInSeason'))),
- 'series': xpath_text(item, _add_ns('clearleap:series')),
- 'season_number': int_or_none(xpath_text(item, _add_ns('clearleap:season'))),
- 'thumbnails': thumbnails,
- 'ie_key': 'CBCWatchVideo',
- })
-
- return self.playlist_result(
- entries, xpath_text(channel, 'guid'),
- xpath_text(channel, 'title'),
- xpath_text(channel, 'description'))
-
-
-class CBCWatchVideoIE(CBCWatchBaseIE):
- IE_NAME = 'cbc.ca:watch:video'
- _VALID_URL = r'https?://api-cbc\.cloud\.clearleap\.com/cloffice/client/web/play/?\?.*?\bcontentId=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
- _TEST = {
- # geo-restricted to Canada, bypassable
- 'url': 'https://api-cbc.cloud.clearleap.com/cloffice/client/web/play/?contentId=3c84472a-1eea-4dee-9267-2655d5055dcf&categoryId=ebc258f5-ee40-4cca-b66b-ba6bd55b7235',
- 'only_matching': True,
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- result = self._call_api(url, video_id)
-
- m3u8_url = xpath_text(result, 'url', fatal=True)
- formats = self._extract_m3u8_formats(re.sub(r'/([^/]+)/[^/?]+\.m3u8', r'/\1/\1.m3u8', m3u8_url), video_id, 'mp4', fatal=False)
- if len(formats) < 2:
- formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
- for f in formats:
- format_id = f.get('format_id')
- if format_id.startswith('AAC'):
- f['acodec'] = 'aac'
- elif format_id.startswith('AC3'):
- f['acodec'] = 'ac-3'
- self._sort_formats(formats)
-
- info = {
- 'id': video_id,
- 'title': video_id,
- 'formats': formats,
- }
-
- rss = xpath_element(result, 'rss')
- if rss:
- info.update(self._parse_rss_feed(rss)['entries'][0])
- del info['url']
- del info['_type']
- del info['ie_key']
- return info
-
-
-class CBCWatchIE(CBCWatchBaseIE):
- IE_NAME = 'cbc.ca:watch'
- _VALID_URL = r'https?://(?:gem|watch)\.cbc\.ca/(?:[^/]+/)+(?P<id>[0-9a-f-]+)'
- _TESTS = [{
- # geo-restricted to Canada, bypassable
- 'url': 'http://watch.cbc.ca/doc-zone/season-6/customer-disservice/38e815a-009e3ab12e4',
- 'info_dict': {
- 'id': '9673749a-5e77-484c-8b62-a1092a6b5168',
- 'ext': 'mp4',
- 'title': 'Customer (Dis)Service',
- 'description': 'md5:8bdd6913a0fe03d4b2a17ebe169c7c87',
- 'upload_date': '20160219',
- 'timestamp': 1455840000,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- 'format': 'bestvideo',
- },
- }, {
- # geo-restricted to Canada, bypassable
- 'url': 'http://watch.cbc.ca/arthur/all/1ed4b385-cd84-49cf-95f0-80f004680057',
- 'info_dict': {
- 'id': '1ed4b385-cd84-49cf-95f0-80f004680057',
- 'title': 'Arthur',
- 'description': 'Arthur, the sweetest 8-year-old aardvark, and his pals solve all kinds of problems with humour, kindness and teamwork.',
- },
- 'playlist_mincount': 30,
- }, {
- 'url': 'https://gem.cbc.ca/media/this-hour-has-22-minutes/season-26/episode-20/38e815a-0108c6c6a42',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- rss = self._call_api('web/browse/' + video_id, video_id)
- return self._parse_rss_feed(rss)
-
-
-class CBCOlympicsIE(InfoExtractor):
- IE_NAME = 'cbc.ca:olympics'
- _VALID_URL = r'https?://olympics\.cbc\.ca/video/[^/]+/(?P<id>[^/?#]+)'
- _TESTS = [{
- 'url': 'https://olympics.cbc.ca/video/whats-on-tv/olympic-morning-featuring-the-opening-ceremony/',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
- video_id = self._hidden_inputs(webpage)['videoId']
- video_doc = self._download_xml(
- 'https://olympics.cbc.ca/videodata/%s.xml' % video_id, video_id)
- title = xpath_text(video_doc, 'title', fatal=True)
- is_live = xpath_text(video_doc, 'kind') == 'Live'
- if is_live:
- title = self._live_title(title)
-
- formats = []
- for video_source in video_doc.findall('videoSources/videoSource'):
- uri = xpath_text(video_source, 'uri')
- if not uri:
- continue
- tokenize = self._download_json(
- 'https://olympics.cbc.ca/api/api-akamai/tokenize',
- video_id, data=json.dumps({
- 'VideoSource': uri,
- }).encode(), headers={
- 'Content-Type': 'application/json',
- 'Referer': url,
- # d3.VideoPlayer._init in https://olympics.cbc.ca/components/script/base.js
- 'Cookie': '_dvp=TK:C0ObxjerU', # AKAMAI CDN cookie
- }, fatal=False)
- if not tokenize:
- continue
- content_url = tokenize['ContentUrl']
- video_source_format = video_source.get('format')
- if video_source_format == 'IIS':
- formats.extend(self._extract_ism_formats(
- content_url, video_id, ism_id=video_source_format, fatal=False))
- else:
- formats.extend(self._extract_m3u8_formats(
- content_url, video_id, 'mp4',
- 'm3u8' if is_live else 'm3u8_native',
- m3u8_id=video_source_format, fatal=False))
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': xpath_text(video_doc, 'description'),
- 'thumbnail': xpath_text(video_doc, 'thumbnailUrl'),
- 'duration': parse_duration(xpath_text(video_doc, 'duration')),
- 'formats': formats,
- 'is_live': is_live,
- }
diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py
deleted file mode 100644
index 1ec58f7d8..000000000
--- a/youtube_dl/extractor/ceskatelevize.py
+++ /dev/null
@@ -1,287 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_unquote,
- compat_urllib_parse_urlparse,
-)
-from ..utils import (
- ExtractorError,
- float_or_none,
- sanitized_Request,
- unescapeHTML,
- update_url_query,
- urlencode_postdata,
- USER_AGENTS,
-)
-
-
-class CeskaTelevizeIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
- _TESTS = [{
- 'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
- 'info_dict': {
- 'id': '61924494877246241',
- 'ext': 'mp4',
- 'title': 'Hyde Park Civilizace: Život v Grónsku',
- 'description': 'md5:3fec8f6bb497be5cdb0c9e8781076626',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'duration': 3350,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- 'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en',
- 'info_dict': {
- 'id': '61924494877028507',
- 'ext': 'mp4',
- 'title': 'Hyde Park Civilizace: Bonus 01 - En',
- 'description': 'English Subtittles',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'duration': 81.3,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- # live stream
- 'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
- 'info_dict': {
- 'id': 402,
- 'ext': 'mp4',
- 'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
- 'is_live': True,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- 'skip': 'Georestricted to Czech Republic',
- }, {
- 'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
-
- webpage = self._download_webpage(url, playlist_id)
-
- NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
- if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
- raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
-
- type_ = None
- episode_id = None
-
- playlist = self._parse_json(
- self._search_regex(
- r'getPlaylistUrl\(\[({.+?})\]', webpage, 'playlist',
- default='{}'), playlist_id)
- if playlist:
- type_ = playlist.get('type')
- episode_id = playlist.get('id')
-
- if not type_:
- type_ = self._html_search_regex(
- r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],',
- webpage, 'type')
- if not episode_id:
- episode_id = self._html_search_regex(
- r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],',
- webpage, 'episode_id')
-
- data = {
- 'playlist[0][type]': type_,
- 'playlist[0][id]': episode_id,
- 'requestUrl': compat_urllib_parse_urlparse(url).path,
- 'requestSource': 'iVysilani',
- }
-
- entries = []
-
- for user_agent in (None, USER_AGENTS['Safari']):
- req = sanitized_Request(
- 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
- data=urlencode_postdata(data))
-
- req.add_header('Content-type', 'application/x-www-form-urlencoded')
- req.add_header('x-addr', '127.0.0.1')
- req.add_header('X-Requested-With', 'XMLHttpRequest')
- if user_agent:
- req.add_header('User-Agent', user_agent)
- req.add_header('Referer', url)
-
- playlistpage = self._download_json(req, playlist_id, fatal=False)
-
- if not playlistpage:
- continue
-
- playlist_url = playlistpage['url']
- if playlist_url == 'error_region':
- raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
-
- req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
- req.add_header('Referer', url)
-
- playlist_title = self._og_search_title(webpage, default=None)
- playlist_description = self._og_search_description(webpage, default=None)
-
- playlist = self._download_json(req, playlist_id, fatal=False)
- if not playlist:
- continue
-
- playlist = playlist.get('playlist')
- if not isinstance(playlist, list):
- continue
-
- playlist_len = len(playlist)
-
- for num, item in enumerate(playlist):
- is_live = item.get('type') == 'LIVE'
- formats = []
- for format_id, stream_url in item.get('streamUrls', {}).items():
- if 'playerType=flash' in stream_url:
- stream_formats = self._extract_m3u8_formats(
- stream_url, playlist_id, 'mp4', 'm3u8_native',
- m3u8_id='hls-%s' % format_id, fatal=False)
- else:
- stream_formats = self._extract_mpd_formats(
- stream_url, playlist_id,
- mpd_id='dash-%s' % format_id, fatal=False)
- # See https://github.com/ytdl-org/youtube-dl/issues/12119#issuecomment-280037031
- if format_id == 'audioDescription':
- for f in stream_formats:
- f['source_preference'] = -10
- formats.extend(stream_formats)
-
- if user_agent and len(entries) == playlist_len:
- entries[num]['formats'].extend(formats)
- continue
-
- item_id = item.get('id') or item['assetId']
- title = item['title']
-
- duration = float_or_none(item.get('duration'))
- thumbnail = item.get('previewImageUrl')
-
- subtitles = {}
- if item.get('type') == 'VOD':
- subs = item.get('subtitles')
- if subs:
- subtitles = self.extract_subtitles(episode_id, subs)
-
- if playlist_len == 1:
- final_title = playlist_title or title
- if is_live:
- final_title = self._live_title(final_title)
- else:
- final_title = '%s (%s)' % (playlist_title, title)
-
- entries.append({
- 'id': item_id,
- 'title': final_title,
- 'description': playlist_description if playlist_len == 1 else None,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'formats': formats,
- 'subtitles': subtitles,
- 'is_live': is_live,
- })
-
- for e in entries:
- self._sort_formats(e['formats'])
-
- return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
-
- def _get_subtitles(self, episode_id, subs):
- original_subtitles = self._download_webpage(
- subs[0]['url'], episode_id, 'Downloading subtitles')
- srt_subs = self._fix_subtitles(original_subtitles)
- return {
- 'cs': [{
- 'ext': 'srt',
- 'data': srt_subs,
- }]
- }
-
- @staticmethod
- def _fix_subtitles(subtitles):
- """ Convert millisecond-based subtitles to SRT """
-
- def _msectotimecode(msec):
- """ Helper utility to convert milliseconds to timecode """
- components = []
- for divider in [1000, 60, 60, 100]:
- components.append(msec % divider)
- msec //= divider
- return '{3:02}:{2:02}:{1:02},{0:03}'.format(*components)
-
- def _fix_subtitle(subtitle):
- for line in subtitle.splitlines():
- m = re.match(r'^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$', line)
- if m:
- yield m.group(1)
- start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:])
- yield '{0} --> {1}'.format(start, stop)
- else:
- yield line
-
- return '\r\n'.join(_fix_subtitle(subtitles))
-
-
-class CeskaTelevizePoradyIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
- _TESTS = [{
- # video with 18+ caution trailer
- 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
- 'info_dict': {
- 'id': '215562210900007-bogotart',
- 'title': 'Queer: Bogotart',
- 'description': 'Alternativní průvodce současným queer světem',
- },
- 'playlist': [{
- 'info_dict': {
- 'id': '61924494876844842',
- 'ext': 'mp4',
- 'title': 'Queer: Bogotart (Varování 18+)',
- 'duration': 10.2,
- },
- }, {
- 'info_dict': {
- 'id': '61924494877068022',
- 'ext': 'mp4',
- 'title': 'Queer: Bogotart (Queer)',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'duration': 1558.3,
- },
- }],
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- # iframe embed
- 'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- data_url = update_url_query(unescapeHTML(self._search_regex(
- (r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
- r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'),
- webpage, 'iframe player url', group='url')), query={
- 'autoStart': 'true',
- })
-
- return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key())
diff --git a/youtube_dl/extractor/channel9.py b/youtube_dl/extractor/channel9.py
deleted file mode 100644
index 81108e704..000000000
--- a/youtube_dl/extractor/channel9.py
+++ /dev/null
@@ -1,262 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- clean_html,
- ExtractorError,
- int_or_none,
- parse_iso8601,
- qualities,
- unescapeHTML,
-)
-
-
-class Channel9IE(InfoExtractor):
- IE_DESC = 'Channel 9'
- IE_NAME = 'channel9'
- _VALID_URL = r'https?://(?:www\.)?(?:channel9\.msdn\.com|s\.ch9\.ms)/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)'
-
- _TESTS = [{
- 'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
- 'md5': '32083d4eaf1946db6d454313f44510ca',
- 'info_dict': {
- 'id': '6c413323-383a-49dc-88f9-a22800cab024',
- 'ext': 'wmv',
- 'title': 'Developer Kick-Off Session: Stuff We Love',
- 'description': 'md5:b80bf9355a503c193aff7ec6cd5a7731',
- 'duration': 4576,
- 'thumbnail': r're:https?://.*\.jpg',
- 'timestamp': 1377717420,
- 'upload_date': '20130828',
- 'session_code': 'KOS002',
- 'session_room': 'Arena 1A',
- 'session_speakers': ['Andrew Coates', 'Brady Gaster', 'Mads Kristensen', 'Ed Blankenship', 'Patrick Klug'],
- },
- }, {
- 'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
- 'md5': 'dcf983ee6acd2088e7188c3cf79b46bc',
- 'info_dict': {
- 'id': 'fe8e435f-bb93-4e01-8e97-a28c01887024',
- 'ext': 'wmv',
- 'title': 'Self-service BI with Power BI - nuclear testing',
- 'description': 'md5:2d17fec927fc91e9e17783b3ecc88f54',
- 'duration': 1540,
- 'thumbnail': r're:https?://.*\.jpg',
- 'timestamp': 1386381991,
- 'upload_date': '20131207',
- 'authors': ['Mike Wilmot'],
- },
- }, {
- # low quality mp4 is best
- 'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
- 'info_dict': {
- 'id': '33ad69d2-6a4e-4172-83a1-a523013dec76',
- 'ext': 'mp4',
- 'title': 'Ranges for the Standard Library',
- 'description': 'md5:9895e0a9fd80822d2f01c454b8f4a372',
- 'duration': 5646,
- 'thumbnail': r're:https?://.*\.jpg',
- 'upload_date': '20150930',
- 'timestamp': 1443640735,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS',
- 'info_dict': {
- 'id': 'Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b',
- 'title': 'Channel 9',
- },
- 'playlist_mincount': 100,
- }, {
- 'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS',
- 'only_matching': True,
- }, {
- 'url': 'https://channel9.msdn.com/Events/Speakers/scott-hanselman/RSS?UrlSafeName=scott-hanselman',
- 'only_matching': True,
- }]
-
- _RSS_URL = 'http://channel9.msdn.com/%s/RSS'
-
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+src=["\'](https?://channel9\.msdn\.com/(?:[^/]+/)+)player\b',
- webpage)
-
- def _extract_list(self, video_id, rss_url=None):
- if not rss_url:
- rss_url = self._RSS_URL % video_id
- rss = self._download_xml(rss_url, video_id, 'Downloading RSS')
- entries = [self.url_result(session_url.text, 'Channel9')
- for session_url in rss.findall('./channel/item/link')]
- title_text = rss.find('./channel/title').text
- return self.playlist_result(entries, video_id, title_text)
-
- def _real_extract(self, url):
- content_path, rss = re.match(self._VALID_URL, url).groups()
-
- if rss:
- return self._extract_list(content_path, url)
-
- webpage = self._download_webpage(
- url, content_path, 'Downloading web page')
-
- episode_data = self._search_regex(
- r"data-episode='([^']+)'", webpage, 'episode data', default=None)
- if episode_data:
- episode_data = self._parse_json(unescapeHTML(
- episode_data), content_path)
- content_id = episode_data['contentId']
- is_session = '/Sessions(' in episode_data['api']
- content_url = 'https://channel9.msdn.com/odata' + episode_data['api']
- if is_session:
- content_url += '?$expand=Speakers'
- else:
- content_url += '?$expand=Authors'
- content_data = self._download_json(content_url, content_id)
- title = content_data['Title']
-
- QUALITIES = (
- 'mp3',
- 'wmv', 'mp4',
- 'wmv-low', 'mp4-low',
- 'wmv-mid', 'mp4-mid',
- 'wmv-high', 'mp4-high',
- )
-
- quality_key = qualities(QUALITIES)
-
- def quality(quality_id, format_url):
- return (len(QUALITIES) if '_Source.' in format_url
- else quality_key(quality_id))
-
- formats = []
- urls = set()
-
- SITE_QUALITIES = {
- 'MP3': 'mp3',
- 'MP4': 'mp4',
- 'Low Quality WMV': 'wmv-low',
- 'Low Quality MP4': 'mp4-low',
- 'Mid Quality WMV': 'wmv-mid',
- 'Mid Quality MP4': 'mp4-mid',
- 'High Quality WMV': 'wmv-high',
- 'High Quality MP4': 'mp4-high',
- }
-
- formats_select = self._search_regex(
- r'(?s)<select[^>]+name=["\']format[^>]+>(.+?)</select', webpage,
- 'formats select', default=None)
- if formats_select:
- for mobj in re.finditer(
- r'<option\b[^>]+\bvalue=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>\s*(?P<format>[^<]+?)\s*<',
- formats_select):
- format_url = mobj.group('url')
- if format_url in urls:
- continue
- urls.add(format_url)
- format_id = mobj.group('format')
- quality_id = SITE_QUALITIES.get(format_id, format_id)
- formats.append({
- 'url': format_url,
- 'format_id': quality_id,
- 'quality': quality(quality_id, format_url),
- 'vcodec': 'none' if quality_id == 'mp3' else None,
- })
-
- API_QUALITIES = {
- 'VideoMP4Low': 'mp4-low',
- 'VideoWMV': 'wmv-mid',
- 'VideoMP4Medium': 'mp4-mid',
- 'VideoMP4High': 'mp4-high',
- 'VideoWMVHQ': 'wmv-hq',
- }
-
- for format_id, q in API_QUALITIES.items():
- q_url = content_data.get(format_id)
- if not q_url or q_url in urls:
- continue
- urls.add(q_url)
- formats.append({
- 'url': q_url,
- 'format_id': q,
- 'quality': quality(q, q_url),
- })
-
- self._sort_formats(formats)
-
- slides = content_data.get('Slides')
- zip_file = content_data.get('ZipFile')
-
- if not formats and not slides and not zip_file:
- raise ExtractorError(
- 'None of recording, slides or zip are available for %s' % content_path)
-
- subtitles = {}
- for caption in content_data.get('Captions', []):
- caption_url = caption.get('Url')
- if not caption_url:
- continue
- subtitles.setdefault(caption.get('Language', 'en'), []).append({
- 'url': caption_url,
- 'ext': 'vtt',
- })
-
- common = {
- 'id': content_id,
- 'title': title,
- 'description': clean_html(content_data.get('Description') or content_data.get('Body')),
- 'thumbnail': content_data.get('Thumbnail') or content_data.get('VideoPlayerPreviewImage'),
- 'duration': int_or_none(content_data.get('MediaLengthInSeconds')),
- 'timestamp': parse_iso8601(content_data.get('PublishedDate')),
- 'avg_rating': int_or_none(content_data.get('Rating')),
- 'rating_count': int_or_none(content_data.get('RatingCount')),
- 'view_count': int_or_none(content_data.get('Views')),
- 'comment_count': int_or_none(content_data.get('CommentCount')),
- 'subtitles': subtitles,
- }
- if is_session:
- speakers = []
- for s in content_data.get('Speakers', []):
- speaker_name = s.get('FullName')
- if not speaker_name:
- continue
- speakers.append(speaker_name)
-
- common.update({
- 'session_code': content_data.get('Code'),
- 'session_room': content_data.get('Room'),
- 'session_speakers': speakers,
- })
- else:
- authors = []
- for a in content_data.get('Authors', []):
- author_name = a.get('DisplayName')
- if not author_name:
- continue
- authors.append(author_name)
- common['authors'] = authors
-
- contents = []
-
- if slides:
- d = common.copy()
- d.update({'title': title + '-Slides', 'url': slides})
- contents.append(d)
-
- if zip_file:
- d = common.copy()
- d.update({'title': title + '-Zip', 'url': zip_file})
- contents.append(d)
-
- if formats:
- d = common.copy()
- d.update({'title': title, 'formats': formats})
- contents.append(d)
- return self.playlist_result(contents)
- else:
- return self._extract_list(content_path)
diff --git a/youtube_dl/extractor/chaturbate.py b/youtube_dl/extractor/chaturbate.py
deleted file mode 100644
index 656e715ae..000000000
--- a/youtube_dl/extractor/chaturbate.py
+++ /dev/null
@@ -1,81 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import ExtractorError
-
-
-class ChaturbateIE(InfoExtractor):
- _VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)'
- _TESTS = [{
- 'url': 'https://www.chaturbate.com/siswet19/',
- 'info_dict': {
- 'id': 'siswet19',
- 'ext': 'mp4',
- 'title': 're:^siswet19 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
- 'age_limit': 18,
- 'is_live': True,
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'Room is offline',
- }, {
- 'url': 'https://chaturbate.com/fullvideo/?b=caylin',
- 'only_matching': True,
- }, {
- 'url': 'https://en.chaturbate.com/siswet19/',
- 'only_matching': True,
- }]
-
- _ROOM_OFFLINE = 'Room is currently offline'
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(
- 'https://chaturbate.com/%s/' % video_id, video_id,
- headers=self.geo_verification_headers())
-
- m3u8_urls = []
-
- for m in re.finditer(
- r'(["\'])(?P<url>http.+?\.m3u8.*?)\1', webpage):
- m3u8_fast_url, m3u8_no_fast_url = m.group('url'), m.group(
- 'url').replace('_fast', '')
- for m3u8_url in (m3u8_fast_url, m3u8_no_fast_url):
- if m3u8_url not in m3u8_urls:
- m3u8_urls.append(m3u8_url)
-
- if not m3u8_urls:
- error = self._search_regex(
- [r'<span[^>]+class=(["\'])desc_span\1[^>]*>(?P<error>[^<]+)</span>',
- r'<div[^>]+id=(["\'])defchat\1[^>]*>\s*<p><strong>(?P<error>[^<]+)<'],
- webpage, 'error', group='error', default=None)
- if not error:
- if any(p in webpage for p in (
- self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')):
- error = self._ROOM_OFFLINE
- if error:
- raise ExtractorError(error, expected=True)
- raise ExtractorError('Unable to find stream URL')
-
- formats = []
- for m3u8_url in m3u8_urls:
- m3u8_id = 'fast' if '_fast' in m3u8_url else 'slow'
- formats.extend(self._extract_m3u8_formats(
- m3u8_url, video_id, ext='mp4',
- # ffmpeg skips segments for fast m3u8
- preference=-10 if m3u8_id == 'fast' else None,
- m3u8_id=m3u8_id, fatal=False, live=True))
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': self._live_title(video_id),
- 'thumbnail': 'https://roomimg.stream.highwebmedia.com/ri/%s.jpg' % video_id,
- 'age_limit': self._rta_search(webpage),
- 'is_live': True,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/cloudflarestream.py b/youtube_dl/extractor/cloudflarestream.py
deleted file mode 100644
index 8ff2c6531..000000000
--- a/youtube_dl/extractor/cloudflarestream.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-
-
-class CloudflareStreamIE(InfoExtractor):
- _VALID_URL = r'''(?x)
- https?://
- (?:
- (?:watch\.)?(?:cloudflarestream\.com|videodelivery\.net)/|
- embed\.(?:cloudflarestream\.com|videodelivery\.net)/embed/[^/]+\.js\?.*?\bvideo=
- )
- (?P<id>[\da-f]+)
- '''
- _TESTS = [{
- 'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717',
- 'info_dict': {
- 'id': '31c9291ab41fac05471db4e73aa11717',
- 'ext': 'mp4',
- 'title': '31c9291ab41fac05471db4e73aa11717',
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
- 'only_matching': True,
- }, {
- 'url': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/manifest/video.mpd',
- 'only_matching': True,
- }, {
- 'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e',
- 'only_matching': True,
- }]
-
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.(?:cloudflarestream\.com|videodelivery\.net)/embed/[^/]+\.js\?.*?\bvideo=[\da-f]+?.*?)\1',
- webpage)]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- formats = self._extract_m3u8_formats(
- 'https://cloudflarestream.com/%s/manifest/video.m3u8' % video_id,
- video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls',
- fatal=False)
- formats.extend(self._extract_mpd_formats(
- 'https://cloudflarestream.com/%s/manifest/video.mpd' % video_id,
- video_id, mpd_id='dash', fatal=False))
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': video_id,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/comcarcoff.py b/youtube_dl/extractor/comcarcoff.py
deleted file mode 100644
index 588aad0d9..000000000
--- a/youtube_dl/extractor/comcarcoff.py
+++ /dev/null
@@ -1,74 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- int_or_none,
- parse_duration,
- parse_iso8601,
-)
-
-
-class ComCarCoffIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]*)'
- _TESTS = [{
- 'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
- 'info_dict': {
- 'id': '2494164',
- 'ext': 'mp4',
- 'upload_date': '20141127',
- 'timestamp': 1417107600,
- 'duration': 1232,
- 'title': 'Happy Thanksgiving Miranda',
- 'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
- },
- 'params': {
- 'skip_download': 'requires ffmpeg',
- }
- }]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- if not display_id:
- display_id = 'comediansincarsgettingcoffee.com'
- webpage = self._download_webpage(url, display_id)
-
- full_data = self._parse_json(
- self._search_regex(
- r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'),
- display_id)['videoData']
-
- display_id = full_data['activeVideo']['video']
- video_data = full_data.get('videos', {}).get(display_id) or full_data['singleshots'][display_id]
-
- video_id = compat_str(video_data['mediaId'])
- title = video_data['title']
- formats = self._extract_m3u8_formats(
- video_data['mediaUrl'], video_id, 'mp4')
- self._sort_formats(formats)
-
- thumbnails = [{
- 'url': video_data['images']['thumb'],
- }, {
- 'url': video_data['images']['poster'],
- }]
-
- timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601(
- video_data.get('pubDate'))
- duration = int_or_none(video_data.get('durationSeconds')) or parse_duration(
- video_data.get('duration'))
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': video_data.get('description'),
- 'timestamp': timestamp,
- 'duration': duration,
- 'thumbnails': thumbnails,
- 'formats': formats,
- 'season_number': int_or_none(video_data.get('season')),
- 'episode_number': int_or_none(video_data.get('episode')),
- 'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))),
- }
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
deleted file mode 100644
index 50d48c40d..000000000
--- a/youtube_dl/extractor/common.py
+++ /dev/null
@@ -1,2974 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import base64
-import datetime
-import hashlib
-import json
-import netrc
-import os
-import random
-import re
-import socket
-import sys
-import time
-import math
-
-from ..compat import (
- compat_cookiejar,
- compat_cookies,
- compat_etree_Element,
- compat_etree_fromstring,
- compat_getpass,
- compat_integer_types,
- compat_http_client,
- compat_os_name,
- compat_str,
- compat_urllib_error,
- compat_urllib_parse_unquote,
- compat_urllib_parse_urlencode,
- compat_urllib_request,
- compat_urlparse,
- compat_xml_parse_error,
-)
-from ..downloader.f4m import (
- get_base_url,
- remove_encrypted_media,
-)
-from ..utils import (
- NO_DEFAULT,
- age_restricted,
- base_url,
- bug_reports_message,
- clean_html,
- compiled_regex_type,
- determine_ext,
- determine_protocol,
- dict_get,
- error_to_compat_str,
- ExtractorError,
- extract_attributes,
- fix_xml_ampersands,
- float_or_none,
- GeoRestrictedError,
- GeoUtils,
- int_or_none,
- js_to_json,
- JSON_LD_RE,
- mimetype2ext,
- orderedSet,
- parse_bitrate,
- parse_codecs,
- parse_duration,
- parse_iso8601,
- parse_m3u8_attributes,
- parse_resolution,
- RegexNotFoundError,
- sanitized_Request,
- sanitize_filename,
- str_or_none,
- strip_or_none,
- unescapeHTML,
- unified_strdate,
- unified_timestamp,
- update_Request,
- update_url_query,
- urljoin,
- url_basename,
- url_or_none,
- xpath_element,
- xpath_text,
- xpath_with_ns,
-)
-
-
-class InfoExtractor(object):
- """Information Extractor class.
-
- Information extractors are the classes that, given a URL, extract
- information about the video (or videos) the URL refers to. This
- information includes the real video URL, the video title, author and
- others. The information is stored in a dictionary which is then
- passed to the YoutubeDL. The YoutubeDL processes this
- information possibly downloading the video to the file system, among
- other possible outcomes.
-
- The type field determines the type of the result.
- By far the most common value (and the default if _type is missing) is
- "video", which indicates a single video.
-
- For a video, the dictionaries must include the following fields:
-
- id: Video identifier.
- title: Video title, unescaped.
-
- Additionally, it must contain either a formats entry or a url one:
-
- formats: A list of dictionaries for each format available, ordered
- from worst to best quality.
-
- Potential fields:
- * url The mandatory URL representing the media:
- for plain file media - HTTP URL of this file,
- for RTMP - RTMP URL,
- for HLS - URL of the M3U8 media playlist,
- for HDS - URL of the F4M manifest,
- for DASH
- - HTTP URL to plain file media (in case of
- unfragmented media)
- - URL of the MPD manifest or base URL
- representing the media if MPD manifest
- is parsed from a string (in case of
- fragmented media)
- for MSS - URL of the ISM manifest.
- * manifest_url
- The URL of the manifest file in case of
- fragmented media:
- for HLS - URL of the M3U8 master playlist,
- for HDS - URL of the F4M manifest,
- for DASH - URL of the MPD manifest,
- for MSS - URL of the ISM manifest.
- * ext Will be calculated from URL if missing
- * format A human-readable description of the format
- ("mp4 container with h264/opus").
- Calculated from the format_id, width, height.
- and format_note fields if missing.
- * format_id A short description of the format
- ("mp4_h264_opus" or "19").
- Technically optional, but strongly recommended.
- * format_note Additional info about the format
- ("3D" or "DASH video")
- * width Width of the video, if known
- * height Height of the video, if known
- * resolution Textual description of width and height
- * tbr Average bitrate of audio and video in KBit/s
- * abr Average audio bitrate in KBit/s
- * acodec Name of the audio codec in use
- * asr Audio sampling rate in Hertz
- * vbr Average video bitrate in KBit/s
- * fps Frame rate
- * vcodec Name of the video codec in use
- * container Name of the container format
- * filesize The number of bytes, if known in advance
- * filesize_approx An estimate for the number of bytes
- * player_url SWF Player URL (used for rtmpdump).
- * protocol The protocol that will be used for the actual
- download, lower-case.
- "http", "https", "rtsp", "rtmp", "rtmpe",
- "m3u8", "m3u8_native" or "http_dash_segments".
- * fragment_base_url
- Base URL for fragments. Each fragment's path
- value (if present) will be relative to
- this URL.
- * fragments A list of fragments of a fragmented media.
- Each fragment entry must contain either an url
- or a path. If an url is present it should be
- considered by a client. Otherwise both path and
- fragment_base_url must be present. Here is
- the list of all potential fields:
- * "url" - fragment's URL
- * "path" - fragment's path relative to
- fragment_base_url
- * "duration" (optional, int or float)
- * "filesize" (optional, int)
- * preference Order number of this format. If this field is
- present and not None, the formats get sorted
- by this field, regardless of all other values.
- -1 for default (order by other properties),
- -2 or smaller for less than default.
- < -1000 to hide the format (if there is
- another one which is strictly better)
- * language Language code, e.g. "de" or "en-US".
- * language_preference Is this in the language mentioned in
- the URL?
- 10 if it's what the URL is about,
- -1 for default (don't know),
- -10 otherwise, other values reserved for now.
- * quality Order number of the video quality of this
- format, irrespective of the file format.
- -1 for default (order by other properties),
- -2 or smaller for less than default.
- * source_preference Order number for this video source
- (quality takes higher priority)
- -1 for default (order by other properties),
- -2 or smaller for less than default.
- * http_headers A dictionary of additional HTTP headers
- to add to the request.
- * stretched_ratio If given and not 1, indicates that the
- video's pixels are not square.
- width : height ratio as float.
- * no_resume The server does not support resuming the
- (HTTP or RTMP) download. Boolean.
- * downloader_options A dictionary of downloader options as
- described in FileDownloader
-
- url: Final video URL.
- ext: Video filename extension.
- format: The video format, defaults to ext (used for --get-format)
- player_url: SWF Player URL (used for rtmpdump).
-
- The following fields are optional:
-
- alt_title: A secondary title of the video.
- display_id An alternative identifier for the video, not necessarily
- unique, but available before title. Typically, id is
- something like "4234987", title "Dancing naked mole rats",
- and display_id "dancing-naked-mole-rats"
- thumbnails: A list of dictionaries, with the following entries:
- * "id" (optional, string) - Thumbnail format ID
- * "url"
- * "preference" (optional, int) - quality of the image
- * "width" (optional, int)
- * "height" (optional, int)
- * "resolution" (optional, string "{width}x{height}",
- deprecated)
- * "filesize" (optional, int)
- thumbnail: Full URL to a video thumbnail image.
- description: Full video description.
- uploader: Full name of the video uploader.
- license: License name the video is licensed under.
- creator: The creator of the video.
- release_date: The date (YYYYMMDD) when the video was released.
- timestamp: UNIX timestamp of the moment the video became available.
- upload_date: Video upload date (YYYYMMDD).
- If not explicitly set, calculated from timestamp.
- uploader_id: Nickname or id of the video uploader.
- uploader_url: Full URL to a personal webpage of the video uploader.
- channel: Full name of the channel the video is uploaded on.
- Note that channel fields may or may not repeat uploader
- fields. This depends on a particular extractor.
- channel_id: Id of the channel.
- channel_url: Full URL to a channel webpage.
- location: Physical location where the video was filmed.
- subtitles: The available subtitles as a dictionary in the format
- {tag: subformats}. "tag" is usually a language code, and
- "subformats" is a list sorted from lower to higher
- preference, each element is a dictionary with the "ext"
- entry and one of:
- * "data": The subtitles file contents
- * "url": A URL pointing to the subtitles file
- "ext" will be calculated from URL if missing
- automatic_captions: Like 'subtitles', used by the YoutubeIE for
- automatically generated captions
- duration: Length of the video in seconds, as an integer or float.
- view_count: How many users have watched the video on the platform.
- like_count: Number of positive ratings of the video
- dislike_count: Number of negative ratings of the video
- repost_count: Number of reposts of the video
- average_rating: Average rating give by users, the scale used depends on the webpage
- comment_count: Number of comments on the video
- comments: A list of comments, each with one or more of the following
- properties (all but one of text or html optional):
- * "author" - human-readable name of the comment author
- * "author_id" - user ID of the comment author
- * "id" - Comment ID
- * "html" - Comment as HTML
- * "text" - Plain text of the comment
- * "timestamp" - UNIX timestamp of comment
- * "parent" - ID of the comment this one is replying to.
- Set to "root" to indicate that this is a
- comment to the original video.
- age_limit: Age restriction for the video, as an integer (years)
- webpage_url: The URL to the video webpage, if given to youtube-dl it
- should allow to get the same result again. (It will be set
- by YoutubeDL if it's missing)
- categories: A list of categories that the video falls in, for example
- ["Sports", "Berlin"]
- tags: A list of tags assigned to the video, e.g. ["sweden", "pop music"]
- is_live: True, False, or None (=unknown). Whether this video is a
- live stream that goes on instead of a fixed-length video.
- start_time: Time in seconds where the reproduction should start, as
- specified in the URL.
- end_time: Time in seconds where the reproduction should end, as
- specified in the URL.
- chapters: A list of dictionaries, with the following entries:
- * "start_time" - The start time of the chapter in seconds
- * "end_time" - The end time of the chapter in seconds
- * "title" (optional, string)
-
- The following fields should only be used when the video belongs to some logical
- chapter or section:
-
- chapter: Name or title of the chapter the video belongs to.
- chapter_number: Number of the chapter the video belongs to, as an integer.
- chapter_id: Id of the chapter the video belongs to, as a unicode string.
-
- The following fields should only be used when the video is an episode of some
- series, programme or podcast:
-
- series: Title of the series or programme the video episode belongs to.
- season: Title of the season the video episode belongs to.
- season_number: Number of the season the video episode belongs to, as an integer.
- season_id: Id of the season the video episode belongs to, as a unicode string.
- episode: Title of the video episode. Unlike mandatory video title field,
- this field should denote the exact title of the video episode
- without any kind of decoration.
- episode_number: Number of the video episode within a season, as an integer.
- episode_id: Id of the video episode, as a unicode string.
-
- The following fields should only be used when the media is a track or a part of
- a music album:
-
- track: Title of the track.
- track_number: Number of the track within an album or a disc, as an integer.
- track_id: Id of the track (useful in case of custom indexing, e.g. 6.iii),
- as a unicode string.
- artist: Artist(s) of the track.
- genre: Genre(s) of the track.
- album: Title of the album the track belongs to.
- album_type: Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc).
- album_artist: List of all artists appeared on the album (e.g.
- "Ash Borer / Fell Voices" or "Various Artists", useful for splits
- and compilations).
- disc_number: Number of the disc or other physical medium the track belongs to,
- as an integer.
- release_year: Year (YYYY) when the album was released.
-
- Unless mentioned otherwise, the fields should be Unicode strings.
-
- Unless mentioned otherwise, None is equivalent to absence of information.
-
-
- _type "playlist" indicates multiple videos.
- There must be a key "entries", which is a list, an iterable, or a PagedList
- object, each element of which is a valid dictionary by this specification.
-
- Additionally, playlists can have "id", "title", "description", "uploader",
- "uploader_id", "uploader_url" attributes with the same semantics as videos
- (see above).
-
-
- _type "multi_video" indicates that there are multiple videos that
- form a single show, for examples multiple acts of an opera or TV episode.
- It must have an entries key like a playlist and contain all the keys
- required for a video at the same time.
-
-
- _type "url" indicates that the video must be extracted from another
- location, possibly by a different extractor. Its only required key is:
- "url" - the next URL to extract.
- The key "ie_key" can be set to the class name (minus the trailing "IE",
- e.g. "Youtube") if the extractor class is known in advance.
- Additionally, the dictionary may have any properties of the resolved entity
- known in advance, for example "title" if the title of the referred video is
- known ahead of time.
-
-
- _type "url_transparent" entities have the same specification as "url", but
- indicate that the given additional information is more precise than the one
- associated with the resolved URL.
- This is useful when a site employs a video service that hosts the video and
- its technical metadata, but that video service does not embed a useful
- title, description etc.
-
-
- Subclasses of this one should re-define the _real_initialize() and
- _real_extract() methods and define a _VALID_URL regexp.
- Probably, they should also be added to the list of extractors.
-
- _GEO_BYPASS attribute may be set to False in order to disable
- geo restriction bypass mechanisms for a particular extractor.
- Though it won't disable explicit geo restriction bypass based on
- country code provided with geo_bypass_country.
-
- _GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted
- countries for this extractor. One of these countries will be used by
- geo restriction bypass mechanism right away in order to bypass
- geo restriction, of course, if the mechanism is not disabled.
-
- _GEO_IP_BLOCKS attribute may contain a list of presumably geo unrestricted
- IP blocks in CIDR notation for this extractor. One of these IP blocks
- will be used by geo restriction bypass mechanism similarly
- to _GEO_COUNTRIES.
-
- Finally, the _WORKING attribute should be set to False for broken IEs
- in order to warn the users and skip the tests.
- """
-
- _ready = False
- _downloader = None
- _x_forwarded_for_ip = None
- _GEO_BYPASS = True
- _GEO_COUNTRIES = None
- _GEO_IP_BLOCKS = None
- _WORKING = True
-
- def __init__(self, downloader=None):
- """Constructor. Receives an optional downloader."""
- self._ready = False
- self._x_forwarded_for_ip = None
- self.set_downloader(downloader)
-
- @classmethod
- def suitable(cls, url):
- """Receives a URL and returns True if suitable for this IE."""
-
- # This does not use has/getattr intentionally - we want to know whether
- # we have cached the regexp for *this* class, whereas getattr would also
- # match the superclass
- if '_VALID_URL_RE' not in cls.__dict__:
- cls._VALID_URL_RE = re.compile(cls._VALID_URL)
- return cls._VALID_URL_RE.match(url) is not None
-
- @classmethod
- def _match_id(cls, url):
- if '_VALID_URL_RE' not in cls.__dict__:
- cls._VALID_URL_RE = re.compile(cls._VALID_URL)
- m = cls._VALID_URL_RE.match(url)
- assert m
- return compat_str(m.group('id'))
-
- @classmethod
- def working(cls):
- """Getter method for _WORKING."""
- return cls._WORKING
-
- def initialize(self):
- """Initializes an instance (authentication, etc)."""
- self._initialize_geo_bypass({
- 'countries': self._GEO_COUNTRIES,
- 'ip_blocks': self._GEO_IP_BLOCKS,
- })
- if not self._ready:
- self._real_initialize()
- self._ready = True
-
- def _initialize_geo_bypass(self, geo_bypass_context):
- """
- Initialize geo restriction bypass mechanism.
-
- This method is used to initialize geo bypass mechanism based on faking
- X-Forwarded-For HTTP header. A random country from provided country list
- is selected and a random IP belonging to this country is generated. This
- IP will be passed as X-Forwarded-For HTTP header in all subsequent
- HTTP requests.
-
- This method will be used for initial geo bypass mechanism initialization
- during the instance initialization with _GEO_COUNTRIES and
- _GEO_IP_BLOCKS.
-
- You may also manually call it from extractor's code if geo bypass
- information is not available beforehand (e.g. obtained during
- extraction) or due to some other reason. In this case you should pass
- this information in geo bypass context passed as first argument. It may
- contain following fields:
-
- countries: List of geo unrestricted countries (similar
- to _GEO_COUNTRIES)
- ip_blocks: List of geo unrestricted IP blocks in CIDR notation
- (similar to _GEO_IP_BLOCKS)
-
- """
- if not self._x_forwarded_for_ip:
-
- # Geo bypass mechanism is explicitly disabled by user
- if not self._downloader.params.get('geo_bypass', True):
- return
-
- if not geo_bypass_context:
- geo_bypass_context = {}
-
- # Backward compatibility: previously _initialize_geo_bypass
- # expected a list of countries, some 3rd party code may still use
- # it this way
- if isinstance(geo_bypass_context, (list, tuple)):
- geo_bypass_context = {
- 'countries': geo_bypass_context,
- }
-
- # The whole point of geo bypass mechanism is to fake IP
- # as X-Forwarded-For HTTP header based on some IP block or
- # country code.
-
- # Path 1: bypassing based on IP block in CIDR notation
-
- # Explicit IP block specified by user, use it right away
- # regardless of whether extractor is geo bypassable or not
- ip_block = self._downloader.params.get('geo_bypass_ip_block', None)
-
- # Otherwise use random IP block from geo bypass context but only
- # if extractor is known as geo bypassable
- if not ip_block:
- ip_blocks = geo_bypass_context.get('ip_blocks')
- if self._GEO_BYPASS and ip_blocks:
- ip_block = random.choice(ip_blocks)
-
- if ip_block:
- self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block)
- if self._downloader.params.get('verbose', False):
- self._downloader.to_screen(
- '[debug] Using fake IP %s as X-Forwarded-For.'
- % self._x_forwarded_for_ip)
- return
-
- # Path 2: bypassing based on country code
-
- # Explicit country code specified by user, use it right away
- # regardless of whether extractor is geo bypassable or not
- country = self._downloader.params.get('geo_bypass_country', None)
-
- # Otherwise use random country code from geo bypass context but
- # only if extractor is known as geo bypassable
- if not country:
- countries = geo_bypass_context.get('countries')
- if self._GEO_BYPASS and countries:
- country = random.choice(countries)
-
- if country:
- self._x_forwarded_for_ip = GeoUtils.random_ipv4(country)
- if self._downloader.params.get('verbose', False):
- self._downloader.to_screen(
- '[debug] Using fake IP %s (%s) as X-Forwarded-For.'
- % (self._x_forwarded_for_ip, country.upper()))
-
- def extract(self, url):
- """Extracts URL information and returns it in list of dicts."""
- try:
- for _ in range(2):
- try:
- self.initialize()
- ie_result = self._real_extract(url)
- if self._x_forwarded_for_ip:
- ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip
- return ie_result
- except GeoRestrictedError as e:
- if self.__maybe_fake_ip_and_retry(e.countries):
- continue
- raise
- except ExtractorError:
- raise
- except compat_http_client.IncompleteRead as e:
- raise ExtractorError('A network error has occurred.', cause=e, expected=True)
- except (KeyError, StopIteration) as e:
- raise ExtractorError('An extractor error has occurred.', cause=e)
-
- def __maybe_fake_ip_and_retry(self, countries):
- if (not self._downloader.params.get('geo_bypass_country', None)
- and self._GEO_BYPASS
- and self._downloader.params.get('geo_bypass', True)
- and not self._x_forwarded_for_ip
- and countries):
- country_code = random.choice(countries)
- self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
- if self._x_forwarded_for_ip:
- self.report_warning(
- 'Video is geo restricted. Retrying extraction with fake IP %s (%s) as X-Forwarded-For.'
- % (self._x_forwarded_for_ip, country_code.upper()))
- return True
- return False
-
- def set_downloader(self, downloader):
- """Sets the downloader for this IE."""
- self._downloader = downloader
-
- def _real_initialize(self):
- """Real initialization process. Redefine in subclasses."""
- pass
-
- def _real_extract(self, url):
- """Real extraction process. Redefine in subclasses."""
- pass
-
- @classmethod
- def ie_key(cls):
- """A string for getting the InfoExtractor with get_info_extractor"""
- return compat_str(cls.__name__[:-2])
-
- @property
- def IE_NAME(self):
- return compat_str(type(self).__name__[:-2])
-
- @staticmethod
- def __can_accept_status_code(err, expected_status):
- assert isinstance(err, compat_urllib_error.HTTPError)
- if expected_status is None:
- return False
- if isinstance(expected_status, compat_integer_types):
- return err.code == expected_status
- elif isinstance(expected_status, (list, tuple)):
- return err.code in expected_status
- elif callable(expected_status):
- return expected_status(err.code) is True
- else:
- assert False
-
- def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}, expected_status=None):
- """
- Return the response handle.
-
- See _download_webpage docstring for arguments specification.
- """
- if note is None:
- self.report_download_webpage(video_id)
- elif note is not False:
- if video_id is None:
- self.to_screen('%s' % (note,))
- else:
- self.to_screen('%s: %s' % (video_id, note))
-
- # Some sites check X-Forwarded-For HTTP header in order to figure out
- # the origin of the client behind proxy. This allows bypassing geo
- # restriction by faking this header's value to IP that belongs to some
- # geo unrestricted country. We will do so once we encounter any
- # geo restriction error.
- if self._x_forwarded_for_ip:
- if 'X-Forwarded-For' not in headers:
- headers['X-Forwarded-For'] = self._x_forwarded_for_ip
-
- if isinstance(url_or_request, compat_urllib_request.Request):
- url_or_request = update_Request(
- url_or_request, data=data, headers=headers, query=query)
- else:
- if query:
- url_or_request = update_url_query(url_or_request, query)
- if data is not None or headers:
- url_or_request = sanitized_Request(url_or_request, data, headers)
- try:
- return self._downloader.urlopen(url_or_request)
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- if isinstance(err, compat_urllib_error.HTTPError):
- if self.__can_accept_status_code(err, expected_status):
- # Retain reference to error to prevent file object from
- # being closed before it can be read. Works around the
- # effects of <https://bugs.python.org/issue15002>
- # introduced in Python 3.4.1.
- err.fp._error = err
- return err.fp
-
- if errnote is False:
- return False
- if errnote is None:
- errnote = 'Unable to download webpage'
-
- errmsg = '%s: %s' % (errnote, error_to_compat_str(err))
- if fatal:
- raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
- else:
- self._downloader.report_warning(errmsg)
- return False
-
- def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
- """
- Return a tuple (page content as string, URL handle).
-
- See _download_webpage docstring for arguments specification.
- """
- # Strip hashes from the URL (#1038)
- if isinstance(url_or_request, (compat_str, str)):
- url_or_request = url_or_request.partition('#')[0]
-
- urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query, expected_status=expected_status)
- if urlh is False:
- assert not fatal
- return False
- content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, encoding=encoding)
- return (content, urlh)
-
- @staticmethod
- def _guess_encoding_from_content(content_type, webpage_bytes):
- m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
- if m:
- encoding = m.group(1)
- else:
- m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]',
- webpage_bytes[:1024])
- if m:
- encoding = m.group(1).decode('ascii')
- elif webpage_bytes.startswith(b'\xff\xfe'):
- encoding = 'utf-16'
- else:
- encoding = 'utf-8'
-
- return encoding
-
- def __check_blocked(self, content):
- first_block = content[:512]
- if ('<title>Access to this site is blocked</title>' in content
- and 'Websense' in first_block):
- msg = 'Access to this webpage has been blocked by Websense filtering software in your network.'
- blocked_iframe = self._html_search_regex(
- r'<iframe src="([^"]+)"', content,
- 'Websense information URL', default=None)
- if blocked_iframe:
- msg += ' Visit %s for more details' % blocked_iframe
- raise ExtractorError(msg, expected=True)
- if '<title>The URL you requested has been blocked</title>' in first_block:
- msg = (
- 'Access to this webpage has been blocked by Indian censorship. '
- 'Use a VPN or proxy server (with --proxy) to route around it.')
- block_msg = self._html_search_regex(
- r'</h1><p>(.*?)</p>',
- content, 'block message', default=None)
- if block_msg:
- msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
- raise ExtractorError(msg, expected=True)
- if ('<title>TTK :: Доступ к ресурсу ограничен</title>' in content
- and 'blocklist.rkn.gov.ru' in content):
- raise ExtractorError(
- 'Access to this webpage has been blocked by decision of the Russian government. '
- 'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
- expected=True)
-
- def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
- content_type = urlh.headers.get('Content-Type', '')
- webpage_bytes = urlh.read()
- if prefix is not None:
- webpage_bytes = prefix + webpage_bytes
- if not encoding:
- encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
- if self._downloader.params.get('dump_intermediate_pages', False):
- self.to_screen('Dumping request to ' + urlh.geturl())
- dump = base64.b64encode(webpage_bytes).decode('ascii')
- self._downloader.to_screen(dump)
- if self._downloader.params.get('write_pages', False):
- basen = '%s_%s' % (video_id, urlh.geturl())
- if len(basen) > 240:
- h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
- basen = basen[:240 - len(h)] + h
- raw_filename = basen + '.dump'
- filename = sanitize_filename(raw_filename, restricted=True)
- self.to_screen('Saving request to ' + filename)
- # Working around MAX_PATH limitation on Windows (see
- # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
- if compat_os_name == 'nt':
- absfilepath = os.path.abspath(filename)
- if len(absfilepath) > 259:
- filename = '\\\\?\\' + absfilepath
- with open(filename, 'wb') as outf:
- outf.write(webpage_bytes)
-
- try:
- content = webpage_bytes.decode(encoding, 'replace')
- except LookupError:
- content = webpage_bytes.decode('utf-8', 'replace')
-
- self.__check_blocked(content)
-
- return content
-
- def _download_webpage(
- self, url_or_request, video_id, note=None, errnote=None,
- fatal=True, tries=1, timeout=5, encoding=None, data=None,
- headers={}, query={}, expected_status=None):
- """
- Return the data of the page as a string.
-
- Arguments:
- url_or_request -- plain text URL as a string or
- a compat_urllib_request.Requestobject
- video_id -- Video/playlist/item identifier (string)
-
- Keyword arguments:
- note -- note printed before downloading (string)
- errnote -- note printed in case of an error (string)
- fatal -- flag denoting whether error should be considered fatal,
- i.e. whether it should cause ExtractionError to be raised,
- otherwise a warning will be reported and extraction continued
- tries -- number of tries
- timeout -- sleep interval between tries
- encoding -- encoding for a page content decoding, guessed automatically
- when not explicitly specified
- data -- POST data (bytes)
- headers -- HTTP headers (dict)
- query -- URL query (dict)
- expected_status -- allows to accept failed HTTP requests (non 2xx
- status code) by explicitly specifying a set of accepted status
- codes. Can be any of the following entities:
- - an integer type specifying an exact failed status code to
- accept
- - a list or a tuple of integer types specifying a list of
- failed status codes to accept
- - a callable accepting an actual failed status code and
- returning True if it should be accepted
- Note that this argument does not affect success status codes (2xx)
- which are always accepted.
- """
-
- success = False
- try_count = 0
- while success is False:
- try:
- res = self._download_webpage_handle(
- url_or_request, video_id, note, errnote, fatal,
- encoding=encoding, data=data, headers=headers, query=query,
- expected_status=expected_status)
- success = True
- except compat_http_client.IncompleteRead as e:
- try_count += 1
- if try_count >= tries:
- raise e
- self._sleep(timeout, video_id)
- if res is False:
- return res
- else:
- content, _ = res
- return content
-
- def _download_xml_handle(
- self, url_or_request, video_id, note='Downloading XML',
- errnote='Unable to download XML', transform_source=None,
- fatal=True, encoding=None, data=None, headers={}, query={},
- expected_status=None):
- """
- Return a tuple (xml as an compat_etree_Element, URL handle).
-
- See _download_webpage docstring for arguments specification.
- """
- res = self._download_webpage_handle(
- url_or_request, video_id, note, errnote, fatal=fatal,
- encoding=encoding, data=data, headers=headers, query=query,
- expected_status=expected_status)
- if res is False:
- return res
- xml_string, urlh = res
- return self._parse_xml(
- xml_string, video_id, transform_source=transform_source,
- fatal=fatal), urlh
-
- def _download_xml(
- self, url_or_request, video_id,
- note='Downloading XML', errnote='Unable to download XML',
- transform_source=None, fatal=True, encoding=None,
- data=None, headers={}, query={}, expected_status=None):
- """
- Return the xml as an compat_etree_Element.
-
- See _download_webpage docstring for arguments specification.
- """
- res = self._download_xml_handle(
- url_or_request, video_id, note=note, errnote=errnote,
- transform_source=transform_source, fatal=fatal, encoding=encoding,
- data=data, headers=headers, query=query,
- expected_status=expected_status)
- return res if res is False else res[0]
-
- def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
- if transform_source:
- xml_string = transform_source(xml_string)
- try:
- return compat_etree_fromstring(xml_string.encode('utf-8'))
- except compat_xml_parse_error as ve:
- errmsg = '%s: Failed to parse XML ' % video_id
- if fatal:
- raise ExtractorError(errmsg, cause=ve)
- else:
- self.report_warning(errmsg + str(ve))
-
- def _download_json_handle(
- self, url_or_request, video_id, note='Downloading JSON metadata',
- errnote='Unable to download JSON metadata', transform_source=None,
- fatal=True, encoding=None, data=None, headers={}, query={},
- expected_status=None):
- """
- Return a tuple (JSON object, URL handle).
-
- See _download_webpage docstring for arguments specification.
- """
- res = self._download_webpage_handle(
- url_or_request, video_id, note, errnote, fatal=fatal,
- encoding=encoding, data=data, headers=headers, query=query,
- expected_status=expected_status)
- if res is False:
- return res
- json_string, urlh = res
- return self._parse_json(
- json_string, video_id, transform_source=transform_source,
- fatal=fatal), urlh
-
- def _download_json(
- self, url_or_request, video_id, note='Downloading JSON metadata',
- errnote='Unable to download JSON metadata', transform_source=None,
- fatal=True, encoding=None, data=None, headers={}, query={},
- expected_status=None):
- """
- Return the JSON object as a dict.
-
- See _download_webpage docstring for arguments specification.
- """
- res = self._download_json_handle(
- url_or_request, video_id, note=note, errnote=errnote,
- transform_source=transform_source, fatal=fatal, encoding=encoding,
- data=data, headers=headers, query=query,
- expected_status=expected_status)
- return res if res is False else res[0]
-
- def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
- if transform_source:
- json_string = transform_source(json_string)
- try:
- return json.loads(json_string)
- except ValueError as ve:
- errmsg = '%s: Failed to parse JSON ' % video_id
- if fatal:
- raise ExtractorError(errmsg, cause=ve)
- else:
- self.report_warning(errmsg + str(ve))
-
- def report_warning(self, msg, video_id=None):
- idstr = '' if video_id is None else '%s: ' % video_id
- self._downloader.report_warning(
- '[%s] %s%s' % (self.IE_NAME, idstr, msg))
-
- def to_screen(self, msg):
- """Print msg to screen, prefixing it with '[ie_name]'"""
- self._downloader.to_screen('[%s] %s' % (self.IE_NAME, msg))
-
- def report_extraction(self, id_or_name):
- """Report information extraction."""
- self.to_screen('%s: Extracting information' % id_or_name)
-
- def report_download_webpage(self, video_id):
- """Report webpage download."""
- self.to_screen('%s: Downloading webpage' % video_id)
-
- def report_age_confirmation(self):
- """Report attempt to confirm age."""
- self.to_screen('Confirming age')
-
- def report_login(self):
- """Report attempt to log in."""
- self.to_screen('Logging in')
-
- @staticmethod
- def raise_login_required(msg='This video is only available for registered users'):
- raise ExtractorError(
- '%s. Use --username and --password or --netrc to provide account credentials.' % msg,
- expected=True)
-
- @staticmethod
- def raise_geo_restricted(msg='This video is not available from your location due to geo restriction', countries=None):
- raise GeoRestrictedError(msg, countries=countries)
-
- # Methods for following #608
- @staticmethod
- def url_result(url, ie=None, video_id=None, video_title=None):
- """Returns a URL that points to a page that should be processed"""
- # TODO: ie should be the class used for getting the info
- video_info = {'_type': 'url',
- 'url': url,
- 'ie_key': ie}
- if video_id is not None:
- video_info['id'] = video_id
- if video_title is not None:
- video_info['title'] = video_title
- return video_info
-
- def playlist_from_matches(self, matches, playlist_id=None, playlist_title=None, getter=None, ie=None):
- urls = orderedSet(
- self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
- for m in matches)
- return self.playlist_result(
- urls, playlist_id=playlist_id, playlist_title=playlist_title)
-
- @staticmethod
- def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
- """Returns a playlist"""
- video_info = {'_type': 'playlist',
- 'entries': entries}
- if playlist_id:
- video_info['id'] = playlist_id
- if playlist_title:
- video_info['title'] = playlist_title
- if playlist_description:
- video_info['description'] = playlist_description
- return video_info
-
- def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
- """
- Perform a regex search on the given string, using a single or a list of
- patterns returning the first matching group.
- In case of failure return a default value or raise a WARNING or a
- RegexNotFoundError, depending on fatal, specifying the field name.
- """
- if isinstance(pattern, (str, compat_str, compiled_regex_type)):
- mobj = re.search(pattern, string, flags)
- else:
- for p in pattern:
- mobj = re.search(p, string, flags)
- if mobj:
- break
-
- if not self._downloader.params.get('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
- _name = '\033[0;34m%s\033[0m' % name
- else:
- _name = name
-
- if mobj:
- if group is None:
- # return the first matching group
- return next(g for g in mobj.groups() if g is not None)
- else:
- return mobj.group(group)
- elif default is not NO_DEFAULT:
- return default
- elif fatal:
- raise RegexNotFoundError('Unable to extract %s' % _name)
- else:
- self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
- return None
-
- def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
- """
- Like _search_regex, but strips HTML tags and unescapes entities.
- """
- res = self._search_regex(pattern, string, name, default, fatal, flags, group)
- if res:
- return clean_html(res).strip()
- else:
- return res
-
- def _get_netrc_login_info(self, netrc_machine=None):
- username = None
- password = None
- netrc_machine = netrc_machine or self._NETRC_MACHINE
-
- if self._downloader.params.get('usenetrc', False):
- try:
- info = netrc.netrc().authenticators(netrc_machine)
- if info is not None:
- username = info[0]
- password = info[2]
- else:
- raise netrc.NetrcParseError(
- 'No authenticators for %s' % netrc_machine)
- except (IOError, netrc.NetrcParseError) as err:
- self._downloader.report_warning(
- 'parsing .netrc: %s' % error_to_compat_str(err))
-
- return username, password
-
- def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
- """
- Get the login info as (username, password)
- First look for the manually specified credentials using username_option
- and password_option as keys in params dictionary. If no such credentials
- available look in the netrc file using the netrc_machine or _NETRC_MACHINE
- value.
- If there's no info available, return (None, None)
- """
- if self._downloader is None:
- return (None, None)
-
- downloader_params = self._downloader.params
-
- # Attempt to use provided username and password or .netrc data
- if downloader_params.get(username_option) is not None:
- username = downloader_params[username_option]
- password = downloader_params[password_option]
- else:
- username, password = self._get_netrc_login_info(netrc_machine)
-
- return username, password
-
- def _get_tfa_info(self, note='two-factor verification code'):
- """
- Get the two-factor authentication info
- TODO - asking the user will be required for sms/phone verify
- currently just uses the command line option
- If there's no info available, return None
- """
- if self._downloader is None:
- return None
- downloader_params = self._downloader.params
-
- if downloader_params.get('twofactor') is not None:
- return downloader_params['twofactor']
-
- return compat_getpass('Type %s and press [Return]: ' % note)
-
- # Helper functions for extracting OpenGraph info
- @staticmethod
- def _og_regexes(prop):
- content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
- property_re = (r'(?:name|property)=(?:\'og[:-]%(prop)s\'|"og[:-]%(prop)s"|\s*og[:-]%(prop)s\b)'
- % {'prop': re.escape(prop)})
- template = r'<meta[^>]+?%s[^>]+?%s'
- return [
- template % (property_re, content_re),
- template % (content_re, property_re),
- ]
-
- @staticmethod
- def _meta_regex(prop):
- return r'''(?isx)<meta
- (?=[^>]+(?:itemprop|name|property|id|http-equiv)=(["\']?)%s\1)
- [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop)
-
- def _og_search_property(self, prop, html, name=None, **kargs):
- if not isinstance(prop, (list, tuple)):
- prop = [prop]
- if name is None:
- name = 'OpenGraph %s' % prop[0]
- og_regexes = []
- for p in prop:
- og_regexes.extend(self._og_regexes(p))
- escaped = self._search_regex(og_regexes, html, name, flags=re.DOTALL, **kargs)
- if escaped is None:
- return None
- return unescapeHTML(escaped)
-
- def _og_search_thumbnail(self, html, **kargs):
- return self._og_search_property('image', html, 'thumbnail URL', fatal=False, **kargs)
-
- def _og_search_description(self, html, **kargs):
- return self._og_search_property('description', html, fatal=False, **kargs)
-
- def _og_search_title(self, html, **kargs):
- return self._og_search_property('title', html, **kargs)
-
- def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
- regexes = self._og_regexes('video') + self._og_regexes('video:url')
- if secure:
- regexes = self._og_regexes('video:secure_url') + regexes
- return self._html_search_regex(regexes, html, name, **kargs)
-
- def _og_search_url(self, html, **kargs):
- return self._og_search_property('url', html, **kargs)
-
- def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
- if not isinstance(name, (list, tuple)):
- name = [name]
- if display_name is None:
- display_name = name[0]
- return self._html_search_regex(
- [self._meta_regex(n) for n in name],
- html, display_name, fatal=fatal, group='content', **kwargs)
-
- def _dc_search_uploader(self, html):
- return self._html_search_meta('dc.creator', html, 'uploader')
-
- def _rta_search(self, html):
- # See http://www.rtalabel.org/index.php?content=howtofaq#single
- if re.search(r'(?ix)<meta\s+name="rating"\s+'
- r' content="RTA-5042-1996-1400-1577-RTA"',
- html):
- return 18
- return 0
-
- def _media_rating_search(self, html):
- # See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/
- rating = self._html_search_meta('rating', html)
-
- if not rating:
- return None
-
- RATING_TABLE = {
- 'safe for kids': 0,
- 'general': 8,
- '14 years': 14,
- 'mature': 17,
- 'restricted': 19,
- }
- return RATING_TABLE.get(rating.lower())
-
- def _family_friendly_search(self, html):
- # See http://schema.org/VideoObject
- family_friendly = self._html_search_meta(
- 'isFamilyFriendly', html, default=None)
-
- if not family_friendly:
- return None
-
- RATING_TABLE = {
- '1': 0,
- 'true': 0,
- '0': 18,
- 'false': 18,
- }
- return RATING_TABLE.get(family_friendly.lower())
-
- def _twitter_search_player(self, html):
- return self._html_search_meta('twitter:player', html,
- 'twitter card player')
-
- def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
- json_ld = self._search_regex(
- JSON_LD_RE, html, 'JSON-LD', group='json_ld', **kwargs)
- default = kwargs.get('default', NO_DEFAULT)
- if not json_ld:
- return default if default is not NO_DEFAULT else {}
- # JSON-LD may be malformed and thus `fatal` should be respected.
- # At the same time `default` may be passed that assumes `fatal=False`
- # for _search_regex. Let's simulate the same behavior here as well.
- fatal = kwargs.get('fatal', True) if default == NO_DEFAULT else False
- return self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type)
-
- def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
- if isinstance(json_ld, compat_str):
- json_ld = self._parse_json(json_ld, video_id, fatal=fatal)
- if not json_ld:
- return {}
- info = {}
- if not isinstance(json_ld, (list, tuple, dict)):
- return info
- if isinstance(json_ld, dict):
- json_ld = [json_ld]
-
- INTERACTION_TYPE_MAP = {
- 'CommentAction': 'comment',
- 'AgreeAction': 'like',
- 'DisagreeAction': 'dislike',
- 'LikeAction': 'like',
- 'DislikeAction': 'dislike',
- 'ListenAction': 'view',
- 'WatchAction': 'view',
- 'ViewAction': 'view',
- }
-
- def extract_interaction_statistic(e):
- interaction_statistic = e.get('interactionStatistic')
- if not isinstance(interaction_statistic, list):
- return
- for is_e in interaction_statistic:
- if not isinstance(is_e, dict):
- continue
- if is_e.get('@type') != 'InteractionCounter':
- continue
- interaction_type = is_e.get('interactionType')
- if not isinstance(interaction_type, compat_str):
- continue
- interaction_count = int_or_none(is_e.get('userInteractionCount'))
- if interaction_count is None:
- continue
- count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1])
- if not count_kind:
- continue
- count_key = '%s_count' % count_kind
- if info.get(count_key) is not None:
- continue
- info[count_key] = interaction_count
-
- def extract_video_object(e):
- assert e['@type'] == 'VideoObject'
- info.update({
- 'url': url_or_none(e.get('contentUrl')),
- 'title': unescapeHTML(e.get('name')),
- 'description': unescapeHTML(e.get('description')),
- 'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
- 'duration': parse_duration(e.get('duration')),
- 'timestamp': unified_timestamp(e.get('uploadDate')),
- 'filesize': float_or_none(e.get('contentSize')),
- 'tbr': int_or_none(e.get('bitrate')),
- 'width': int_or_none(e.get('width')),
- 'height': int_or_none(e.get('height')),
- 'view_count': int_or_none(e.get('interactionCount')),
- })
- extract_interaction_statistic(e)
-
- for e in json_ld:
- if isinstance(e.get('@context'), compat_str) and re.match(r'^https?://schema.org/?$', e.get('@context')):
- item_type = e.get('@type')
- if expected_type is not None and expected_type != item_type:
- return info
- if item_type in ('TVEpisode', 'Episode'):
- episode_name = unescapeHTML(e.get('name'))
- info.update({
- 'episode': episode_name,
- 'episode_number': int_or_none(e.get('episodeNumber')),
- 'description': unescapeHTML(e.get('description')),
- })
- if not info.get('title') and episode_name:
- info['title'] = episode_name
- part_of_season = e.get('partOfSeason')
- if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'):
- info.update({
- 'season': unescapeHTML(part_of_season.get('name')),
- 'season_number': int_or_none(part_of_season.get('seasonNumber')),
- })
- part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
- if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
- info['series'] = unescapeHTML(part_of_series.get('name'))
- elif item_type == 'Movie':
- info.update({
- 'title': unescapeHTML(e.get('name')),
- 'description': unescapeHTML(e.get('description')),
- 'duration': parse_duration(e.get('duration')),
- 'timestamp': unified_timestamp(e.get('dateCreated')),
- })
- elif item_type in ('Article', 'NewsArticle'):
- info.update({
- 'timestamp': parse_iso8601(e.get('datePublished')),
- 'title': unescapeHTML(e.get('headline')),
- 'description': unescapeHTML(e.get('articleBody')),
- })
- elif item_type == 'VideoObject':
- extract_video_object(e)
- continue
- video = e.get('video')
- if isinstance(video, dict) and video.get('@type') == 'VideoObject':
- extract_video_object(video)
- break
- return dict((k, v) for k, v in info.items() if v is not None)
-
- @staticmethod
- def _hidden_inputs(html):
- html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
- hidden_inputs = {}
- for input in re.findall(r'(?i)(<input[^>]+>)', html):
- attrs = extract_attributes(input)
- if not input:
- continue
- if attrs.get('type') not in ('hidden', 'submit'):
- continue
- name = attrs.get('name') or attrs.get('id')
- value = attrs.get('value')
- if name and value is not None:
- hidden_inputs[name] = value
- return hidden_inputs
-
- def _form_hidden_inputs(self, form_id, html):
- form = self._search_regex(
- r'(?is)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id,
- html, '%s form' % form_id, group='form')
- return self._hidden_inputs(form)
-
- def _sort_formats(self, formats, field_preference=None):
- if not formats:
- raise ExtractorError('No video formats found')
-
- for f in formats:
- # Automatically determine tbr when missing based on abr and vbr (improves
- # formats sorting in some cases)
- if 'tbr' not in f and f.get('abr') is not None and f.get('vbr') is not None:
- f['tbr'] = f['abr'] + f['vbr']
-
- def _formats_key(f):
- # TODO remove the following workaround
- from ..utils import determine_ext
- if not f.get('ext') and 'url' in f:
- f['ext'] = determine_ext(f['url'])
-
- if isinstance(field_preference, (list, tuple)):
- return tuple(
- f.get(field)
- if f.get(field) is not None
- else ('' if field == 'format_id' else -1)
- for field in field_preference)
-
- preference = f.get('preference')
- if preference is None:
- preference = 0
- if f.get('ext') in ['f4f', 'f4m']: # Not yet supported
- preference -= 0.5
-
- protocol = f.get('protocol') or determine_protocol(f)
- proto_preference = 0 if protocol in ['http', 'https'] else (-0.5 if protocol == 'rtsp' else -0.1)
-
- if f.get('vcodec') == 'none': # audio only
- preference -= 50
- if self._downloader.params.get('prefer_free_formats'):
- ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus']
- else:
- ORDER = ['webm', 'opus', 'ogg', 'mp3', 'aac', 'm4a']
- ext_preference = 0
- try:
- audio_ext_preference = ORDER.index(f['ext'])
- except ValueError:
- audio_ext_preference = -1
- else:
- if f.get('acodec') == 'none': # video only
- preference -= 40
- if self._downloader.params.get('prefer_free_formats'):
- ORDER = ['flv', 'mp4', 'webm']
- else:
- ORDER = ['webm', 'flv', 'mp4']
- try:
- ext_preference = ORDER.index(f['ext'])
- except ValueError:
- ext_preference = -1
- audio_ext_preference = 0
-
- return (
- preference,
- f.get('language_preference') if f.get('language_preference') is not None else -1,
- f.get('quality') if f.get('quality') is not None else -1,
- f.get('tbr') if f.get('tbr') is not None else -1,
- f.get('filesize') if f.get('filesize') is not None else -1,
- f.get('vbr') if f.get('vbr') is not None else -1,
- f.get('height') if f.get('height') is not None else -1,
- f.get('width') if f.get('width') is not None else -1,
- proto_preference,
- ext_preference,
- f.get('abr') if f.get('abr') is not None else -1,
- audio_ext_preference,
- f.get('fps') if f.get('fps') is not None else -1,
- f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
- f.get('source_preference') if f.get('source_preference') is not None else -1,
- f.get('format_id') if f.get('format_id') is not None else '',
- )
- formats.sort(key=_formats_key)
-
- def _check_formats(self, formats, video_id):
- if formats:
- formats[:] = filter(
- lambda f: self._is_valid_url(
- f['url'], video_id,
- item='%s video format' % f.get('format_id') if f.get('format_id') else 'video'),
- formats)
-
- @staticmethod
- def _remove_duplicate_formats(formats):
- format_urls = set()
- unique_formats = []
- for f in formats:
- if f['url'] not in format_urls:
- format_urls.add(f['url'])
- unique_formats.append(f)
- formats[:] = unique_formats
-
- def _is_valid_url(self, url, video_id, item='video', headers={}):
- url = self._proto_relative_url(url, scheme='http:')
- # For now assume non HTTP(S) URLs always valid
- if not (url.startswith('http://') or url.startswith('https://')):
- return True
- try:
- self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
- return True
- except ExtractorError:
- self.to_screen(
- '%s: %s URL is invalid, skipping' % (video_id, item))
- return False
-
- def http_scheme(self):
- """ Either "http:" or "https:", depending on the user's preferences """
- return (
- 'http:'
- if self._downloader.params.get('prefer_insecure', False)
- else 'https:')
-
- def _proto_relative_url(self, url, scheme=None):
- if url is None:
- return url
- if url.startswith('//'):
- if scheme is None:
- scheme = self.http_scheme()
- return scheme + url
- else:
- return url
-
- def _sleep(self, timeout, video_id, msg_template=None):
- if msg_template is None:
- msg_template = '%(video_id)s: Waiting for %(timeout)s seconds'
- msg = msg_template % {'video_id': video_id, 'timeout': timeout}
- self.to_screen(msg)
- time.sleep(timeout)
-
- def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None,
- transform_source=lambda s: fix_xml_ampersands(s).strip(),
- fatal=True, m3u8_id=None):
- manifest = self._download_xml(
- manifest_url, video_id, 'Downloading f4m manifest',
- 'Unable to download f4m manifest',
- # Some manifests may be malformed, e.g. prosiebensat1 generated manifests
- # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244)
- transform_source=transform_source,
- fatal=fatal)
-
- if manifest is False:
- return []
-
- return self._parse_f4m_formats(
- manifest, manifest_url, video_id, preference=preference, f4m_id=f4m_id,
- transform_source=transform_source, fatal=fatal, m3u8_id=m3u8_id)
-
- def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None,
- transform_source=lambda s: fix_xml_ampersands(s).strip(),
- fatal=True, m3u8_id=None):
- if not isinstance(manifest, compat_etree_Element) and not fatal:
- return []
-
- # currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy
- akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0')
- if akamai_pv is not None and ';' in akamai_pv.text:
- playerVerificationChallenge = akamai_pv.text.split(';')[0]
- if playerVerificationChallenge.strip() != '':
- return []
-
- formats = []
- manifest_version = '1.0'
- media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media')
- if not media_nodes:
- manifest_version = '2.0'
- media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
- # Remove unsupported DRM protected media from final formats
- # rendition (see https://github.com/ytdl-org/youtube-dl/issues/8573).
- media_nodes = remove_encrypted_media(media_nodes)
- if not media_nodes:
- return formats
-
- manifest_base_url = get_base_url(manifest)
-
- bootstrap_info = xpath_element(
- manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
- 'bootstrap info', default=None)
-
- vcodec = None
- mime_type = xpath_text(
- manifest, ['{http://ns.adobe.com/f4m/1.0}mimeType', '{http://ns.adobe.com/f4m/2.0}mimeType'],
- 'base URL', default=None)
- if mime_type and mime_type.startswith('audio/'):
- vcodec = 'none'
-
- for i, media_el in enumerate(media_nodes):
- tbr = int_or_none(media_el.attrib.get('bitrate'))
- width = int_or_none(media_el.attrib.get('width'))
- height = int_or_none(media_el.attrib.get('height'))
- format_id = '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)]))
- # If <bootstrapInfo> is present, the specified f4m is a
- # stream-level manifest, and only set-level manifests may refer to
- # external resources. See section 11.4 and section 4 of F4M spec
- if bootstrap_info is None:
- media_url = None
- # @href is introduced in 2.0, see section 11.6 of F4M spec
- if manifest_version == '2.0':
- media_url = media_el.attrib.get('href')
- if media_url is None:
- media_url = media_el.attrib.get('url')
- if not media_url:
- continue
- manifest_url = (
- media_url if media_url.startswith('http://') or media_url.startswith('https://')
- else ((manifest_base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
- # If media_url is itself a f4m manifest do the recursive extraction
- # since bitrates in parent manifest (this one) and media_url manifest
- # may differ leading to inability to resolve the format by requested
- # bitrate in f4m downloader
- ext = determine_ext(manifest_url)
- if ext == 'f4m':
- f4m_formats = self._extract_f4m_formats(
- manifest_url, video_id, preference=preference, f4m_id=f4m_id,
- transform_source=transform_source, fatal=fatal)
- # Sometimes stream-level manifest contains single media entry that
- # does not contain any quality metadata (e.g. http://matchtv.ru/#live-player).
- # At the same time parent's media entry in set-level manifest may
- # contain it. We will copy it from parent in such cases.
- if len(f4m_formats) == 1:
- f = f4m_formats[0]
- f.update({
- 'tbr': f.get('tbr') or tbr,
- 'width': f.get('width') or width,
- 'height': f.get('height') or height,
- 'format_id': f.get('format_id') if not tbr else format_id,
- 'vcodec': vcodec,
- })
- formats.extend(f4m_formats)
- continue
- elif ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- manifest_url, video_id, 'mp4', preference=preference,
- m3u8_id=m3u8_id, fatal=fatal))
- continue
- formats.append({
- 'format_id': format_id,
- 'url': manifest_url,
- 'manifest_url': manifest_url,
- 'ext': 'flv' if bootstrap_info is not None else None,
- 'protocol': 'f4m',
- 'tbr': tbr,
- 'width': width,
- 'height': height,
- 'vcodec': vcodec,
- 'preference': preference,
- })
- return formats
-
- def _m3u8_meta_format(self, m3u8_url, ext=None, preference=None, m3u8_id=None):
- return {
- 'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
- 'url': m3u8_url,
- 'ext': ext,
- 'protocol': 'm3u8',
- 'preference': preference - 100 if preference else -100,
- 'resolution': 'multiple',
- 'format_note': 'Quality selection URL',
- }
-
- def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
- entry_protocol='m3u8', preference=None,
- m3u8_id=None, note=None, errnote=None,
- fatal=True, live=False):
- res = self._download_webpage_handle(
- m3u8_url, video_id,
- note=note or 'Downloading m3u8 information',
- errnote=errnote or 'Failed to download m3u8 information',
- fatal=fatal)
-
- if res is False:
- return []
-
- m3u8_doc, urlh = res
- m3u8_url = urlh.geturl()
-
- return self._parse_m3u8_formats(
- m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
- preference=preference, m3u8_id=m3u8_id, live=live)
-
- def _parse_m3u8_formats(self, m3u8_doc, m3u8_url, ext=None,
- entry_protocol='m3u8', preference=None,
- m3u8_id=None, live=False):
- if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
- return []
-
- if re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc): # Apple FairPlay
- return []
-
- formats = []
-
- format_url = lambda u: (
- u
- if re.match(r'^https?://', u)
- else compat_urlparse.urljoin(m3u8_url, u))
-
- # References:
- # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
- # 2. https://github.com/ytdl-org/youtube-dl/issues/12211
- # 3. https://github.com/ytdl-org/youtube-dl/issues/18923
-
- # We should try extracting formats only from master playlists [1, 4.3.4],
- # i.e. playlists that describe available qualities. On the other hand
- # media playlists [1, 4.3.3] should be returned as is since they contain
- # just the media without qualities renditions.
- # Fortunately, master playlist can be easily distinguished from media
- # playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4]
- # master playlist tags MUST NOT appear in a media playist and vice versa.
- # As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every
- # media playlist and MUST NOT appear in master playlist thus we can
- # clearly detect media playlist with this criterion.
-
- if '#EXT-X-TARGETDURATION' in m3u8_doc: # media playlist, return as is
- return [{
- 'url': m3u8_url,
- 'format_id': m3u8_id,
- 'ext': ext,
- 'protocol': entry_protocol,
- 'preference': preference,
- }]
-
- groups = {}
- last_stream_inf = {}
-
- def extract_media(x_media_line):
- media = parse_m3u8_attributes(x_media_line)
- # As per [1, 4.3.4.1] TYPE, GROUP-ID and NAME are REQUIRED
- media_type, group_id, name = media.get('TYPE'), media.get('GROUP-ID'), media.get('NAME')
- if not (media_type and group_id and name):
- return
- groups.setdefault(group_id, []).append(media)
- if media_type not in ('VIDEO', 'AUDIO'):
- return
- media_url = media.get('URI')
- if media_url:
- format_id = []
- for v in (m3u8_id, group_id, name):
- if v:
- format_id.append(v)
- f = {
- 'format_id': '-'.join(format_id),
- 'url': format_url(media_url),
- 'manifest_url': m3u8_url,
- 'language': media.get('LANGUAGE'),
- 'ext': ext,
- 'protocol': entry_protocol,
- 'preference': preference,
- }
- if media_type == 'AUDIO':
- f['vcodec'] = 'none'
- formats.append(f)
-
- def build_stream_name():
- # Despite specification does not mention NAME attribute for
- # EXT-X-STREAM-INF tag it still sometimes may be present (see [1]
- # or vidio test in TestInfoExtractor.test_parse_m3u8_formats)
- # 1. http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015
- stream_name = last_stream_inf.get('NAME')
- if stream_name:
- return stream_name
- # If there is no NAME in EXT-X-STREAM-INF it will be obtained
- # from corresponding rendition group
- stream_group_id = last_stream_inf.get('VIDEO')
- if not stream_group_id:
- return
- stream_group = groups.get(stream_group_id)
- if not stream_group:
- return stream_group_id
- rendition = stream_group[0]
- return rendition.get('NAME') or stream_group_id
-
- # parse EXT-X-MEDIA tags before EXT-X-STREAM-INF in order to have the
- # chance to detect video only formats when EXT-X-STREAM-INF tags
- # precede EXT-X-MEDIA tags in HLS manifest such as [3].
- for line in m3u8_doc.splitlines():
- if line.startswith('#EXT-X-MEDIA:'):
- extract_media(line)
-
- for line in m3u8_doc.splitlines():
- if line.startswith('#EXT-X-STREAM-INF:'):
- last_stream_inf = parse_m3u8_attributes(line)
- elif line.startswith('#') or not line.strip():
- continue
- else:
- tbr = float_or_none(
- last_stream_inf.get('AVERAGE-BANDWIDTH')
- or last_stream_inf.get('BANDWIDTH'), scale=1000)
- format_id = []
- if m3u8_id:
- format_id.append(m3u8_id)
- stream_name = build_stream_name()
- # Bandwidth of live streams may differ over time thus making
- # format_id unpredictable. So it's better to keep provided
- # format_id intact.
- if not live:
- format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats)))
- manifest_url = format_url(line.strip())
- f = {
- 'format_id': '-'.join(format_id),
- 'url': manifest_url,
- 'manifest_url': m3u8_url,
- 'tbr': tbr,
- 'ext': ext,
- 'fps': float_or_none(last_stream_inf.get('FRAME-RATE')),
- 'protocol': entry_protocol,
- 'preference': preference,
- }
- resolution = last_stream_inf.get('RESOLUTION')
- if resolution:
- mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
- if mobj:
- f['width'] = int(mobj.group('width'))
- f['height'] = int(mobj.group('height'))
- # Unified Streaming Platform
- mobj = re.search(
- r'audio.*?(?:%3D|=)(\d+)(?:-video.*?(?:%3D|=)(\d+))?', f['url'])
- if mobj:
- abr, vbr = mobj.groups()
- abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000)
- f.update({
- 'vbr': vbr,
- 'abr': abr,
- })
- codecs = parse_codecs(last_stream_inf.get('CODECS'))
- f.update(codecs)
- audio_group_id = last_stream_inf.get('AUDIO')
- # As per [1, 4.3.4.1.1] any EXT-X-STREAM-INF tag which
- # references a rendition group MUST have a CODECS attribute.
- # However, this is not always respected, for example, [2]
- # contains EXT-X-STREAM-INF tag which references AUDIO
- # rendition group but does not have CODECS and despite
- # referencing an audio group it represents a complete
- # (with audio and video) format. So, for such cases we will
- # ignore references to rendition groups and treat them
- # as complete formats.
- if audio_group_id and codecs and f.get('vcodec') != 'none':
- audio_group = groups.get(audio_group_id)
- if audio_group and audio_group[0].get('URI'):
- # TODO: update acodec for audio only formats with
- # the same GROUP-ID
- f['acodec'] = 'none'
- formats.append(f)
- last_stream_inf = {}
- return formats
-
- @staticmethod
- def _xpath_ns(path, namespace=None):
- if not namespace:
- return path
- out = []
- for c in path.split('/'):
- if not c or c == '.':
- out.append(c)
- else:
- out.append('{%s}%s' % (namespace, c))
- return '/'.join(out)
-
- def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None):
- smil = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source)
-
- if smil is False:
- assert not fatal
- return []
-
- namespace = self._parse_smil_namespace(smil)
-
- return self._parse_smil_formats(
- smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
-
- def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None):
- smil = self._download_smil(smil_url, video_id, fatal=fatal)
- if smil is False:
- return {}
- return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params)
-
- def _download_smil(self, smil_url, video_id, fatal=True, transform_source=None):
- return self._download_xml(
- smil_url, video_id, 'Downloading SMIL file',
- 'Unable to download SMIL file', fatal=fatal, transform_source=transform_source)
-
- def _parse_smil(self, smil, smil_url, video_id, f4m_params=None):
- namespace = self._parse_smil_namespace(smil)
-
- formats = self._parse_smil_formats(
- smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
- subtitles = self._parse_smil_subtitles(smil, namespace=namespace)
-
- video_id = os.path.splitext(url_basename(smil_url))[0]
- title = None
- description = None
- upload_date = None
- for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
- name = meta.attrib.get('name')
- content = meta.attrib.get('content')
- if not name or not content:
- continue
- if not title and name == 'title':
- title = content
- elif not description and name in ('description', 'abstract'):
- description = content
- elif not upload_date and name == 'date':
- upload_date = unified_strdate(content)
-
- thumbnails = [{
- 'id': image.get('type'),
- 'url': image.get('src'),
- 'width': int_or_none(image.get('width')),
- 'height': int_or_none(image.get('height')),
- } for image in smil.findall(self._xpath_ns('.//image', namespace)) if image.get('src')]
-
- return {
- 'id': video_id,
- 'title': title or video_id,
- 'description': description,
- 'upload_date': upload_date,
- 'thumbnails': thumbnails,
- 'formats': formats,
- 'subtitles': subtitles,
- }
-
- def _parse_smil_namespace(self, smil):
- return self._search_regex(
- r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
-
- def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
- base = smil_url
- for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
- b = meta.get('base') or meta.get('httpBase')
- if b:
- base = b
- break
-
- formats = []
- rtmp_count = 0
- http_count = 0
- m3u8_count = 0
-
- srcs = []
- media = smil.findall(self._xpath_ns('.//video', namespace)) + smil.findall(self._xpath_ns('.//audio', namespace))
- for medium in media:
- src = medium.get('src')
- if not src or src in srcs:
- continue
- srcs.append(src)
-
- bitrate = float_or_none(medium.get('system-bitrate') or medium.get('systemBitrate'), 1000)
- filesize = int_or_none(medium.get('size') or medium.get('fileSize'))
- width = int_or_none(medium.get('width'))
- height = int_or_none(medium.get('height'))
- proto = medium.get('proto')
- ext = medium.get('ext')
- src_ext = determine_ext(src)
- streamer = medium.get('streamer') or base
-
- if proto == 'rtmp' or streamer.startswith('rtmp'):
- rtmp_count += 1
- formats.append({
- 'url': streamer,
- 'play_path': src,
- 'ext': 'flv',
- 'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
- 'tbr': bitrate,
- 'filesize': filesize,
- 'width': width,
- 'height': height,
- })
- if transform_rtmp_url:
- streamer, src = transform_rtmp_url(streamer, src)
- formats[-1].update({
- 'url': streamer,
- 'play_path': src,
- })
- continue
-
- src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
- src_url = src_url.strip()
-
- if proto == 'm3u8' or src_ext == 'm3u8':
- m3u8_formats = self._extract_m3u8_formats(
- src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False)
- if len(m3u8_formats) == 1:
- m3u8_count += 1
- m3u8_formats[0].update({
- 'format_id': 'hls-%d' % (m3u8_count if bitrate is None else bitrate),
- 'tbr': bitrate,
- 'width': width,
- 'height': height,
- })
- formats.extend(m3u8_formats)
- elif src_ext == 'f4m':
- f4m_url = src_url
- if not f4m_params:
- f4m_params = {
- 'hdcore': '3.2.0',
- 'plugin': 'flowplayer-3.2.0.1',
- }
- f4m_url += '&' if '?' in f4m_url else '?'
- f4m_url += compat_urllib_parse_urlencode(f4m_params)
- formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
- elif src_ext == 'mpd':
- formats.extend(self._extract_mpd_formats(
- src_url, video_id, mpd_id='dash', fatal=False))
- elif re.search(r'\.ism/[Mm]anifest', src_url):
- formats.extend(self._extract_ism_formats(
- src_url, video_id, ism_id='mss', fatal=False))
- elif src_url.startswith('http') and self._is_valid_url(src, video_id):
- http_count += 1
- formats.append({
- 'url': src_url,
- 'ext': ext or src_ext or 'flv',
- 'format_id': 'http-%d' % (bitrate or http_count),
- 'tbr': bitrate,
- 'filesize': filesize,
- 'width': width,
- 'height': height,
- })
-
- return formats
-
- def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
- urls = []
- subtitles = {}
- for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
- src = textstream.get('src')
- if not src or src in urls:
- continue
- urls.append(src)
- ext = textstream.get('ext') or mimetype2ext(textstream.get('type')) or determine_ext(src)
- lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') or textstream.get('lang') or subtitles_lang
- subtitles.setdefault(lang, []).append({
- 'url': src,
- 'ext': ext,
- })
- return subtitles
-
- def _extract_xspf_playlist(self, xspf_url, playlist_id, fatal=True):
- xspf = self._download_xml(
- xspf_url, playlist_id, 'Downloading xpsf playlist',
- 'Unable to download xspf manifest', fatal=fatal)
- if xspf is False:
- return []
- return self._parse_xspf(
- xspf, playlist_id, xspf_url=xspf_url,
- xspf_base_url=base_url(xspf_url))
-
- def _parse_xspf(self, xspf_doc, playlist_id, xspf_url=None, xspf_base_url=None):
- NS_MAP = {
- 'xspf': 'http://xspf.org/ns/0/',
- 's1': 'http://static.streamone.nl/player/ns/0',
- }
-
- entries = []
- for track in xspf_doc.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
- title = xpath_text(
- track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id)
- description = xpath_text(
- track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description')
- thumbnail = xpath_text(
- track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail')
- duration = float_or_none(
- xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)
-
- formats = []
- for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP)):
- format_url = urljoin(xspf_base_url, location.text)
- if not format_url:
- continue
- formats.append({
- 'url': format_url,
- 'manifest_url': xspf_url,
- 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
- 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
- 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
- })
- self._sort_formats(formats)
-
- entries.append({
- 'id': playlist_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'formats': formats,
- })
- return entries
-
- def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}):
- res = self._download_xml_handle(
- mpd_url, video_id,
- note=note or 'Downloading MPD manifest',
- errnote=errnote or 'Failed to download MPD manifest',
- fatal=fatal)
- if res is False:
- return []
- mpd_doc, urlh = res
- if mpd_doc is None:
- return []
- mpd_base_url = base_url(urlh.geturl())
-
- return self._parse_mpd_formats(
- mpd_doc, mpd_id=mpd_id, mpd_base_url=mpd_base_url,
- formats_dict=formats_dict, mpd_url=mpd_url)
-
- def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None):
- """
- Parse formats from MPD manifest.
- References:
- 1. MPEG-DASH Standard, ISO/IEC 23009-1:2014(E),
- http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
- 2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
- """
- if mpd_doc.get('type') == 'dynamic':
- return []
-
- namespace = self._search_regex(r'(?i)^{([^}]+)?}MPD$', mpd_doc.tag, 'namespace', default=None)
-
- def _add_ns(path):
- return self._xpath_ns(path, namespace)
-
- def is_drm_protected(element):
- return element.find(_add_ns('ContentProtection')) is not None
-
- def extract_multisegment_info(element, ms_parent_info):
- ms_info = ms_parent_info.copy()
-
- # As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some
- # common attributes and elements. We will only extract relevant
- # for us.
- def extract_common(source):
- segment_timeline = source.find(_add_ns('SegmentTimeline'))
- if segment_timeline is not None:
- s_e = segment_timeline.findall(_add_ns('S'))
- if s_e:
- ms_info['total_number'] = 0
- ms_info['s'] = []
- for s in s_e:
- r = int(s.get('r', 0))
- ms_info['total_number'] += 1 + r
- ms_info['s'].append({
- 't': int(s.get('t', 0)),
- # @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60])
- 'd': int(s.attrib['d']),
- 'r': r,
- })
- start_number = source.get('startNumber')
- if start_number:
- ms_info['start_number'] = int(start_number)
- timescale = source.get('timescale')
- if timescale:
- ms_info['timescale'] = int(timescale)
- segment_duration = source.get('duration')
- if segment_duration:
- ms_info['segment_duration'] = float(segment_duration)
-
- def extract_Initialization(source):
- initialization = source.find(_add_ns('Initialization'))
- if initialization is not None:
- ms_info['initialization_url'] = initialization.attrib['sourceURL']
-
- segment_list = element.find(_add_ns('SegmentList'))
- if segment_list is not None:
- extract_common(segment_list)
- extract_Initialization(segment_list)
- segment_urls_e = segment_list.findall(_add_ns('SegmentURL'))
- if segment_urls_e:
- ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e]
- else:
- segment_template = element.find(_add_ns('SegmentTemplate'))
- if segment_template is not None:
- extract_common(segment_template)
- media = segment_template.get('media')
- if media:
- ms_info['media'] = media
- initialization = segment_template.get('initialization')
- if initialization:
- ms_info['initialization'] = initialization
- else:
- extract_Initialization(segment_template)
- return ms_info
-
- mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
- formats = []
- for period in mpd_doc.findall(_add_ns('Period')):
- period_duration = parse_duration(period.get('duration')) or mpd_duration
- period_ms_info = extract_multisegment_info(period, {
- 'start_number': 1,
- 'timescale': 1,
- })
- for adaptation_set in period.findall(_add_ns('AdaptationSet')):
- if is_drm_protected(adaptation_set):
- continue
- adaption_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info)
- for representation in adaptation_set.findall(_add_ns('Representation')):
- if is_drm_protected(representation):
- continue
- representation_attrib = adaptation_set.attrib.copy()
- representation_attrib.update(representation.attrib)
- # According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
- mime_type = representation_attrib['mimeType']
- content_type = mime_type.split('/')[0]
- if content_type == 'text':
- # TODO implement WebVTT downloading
- pass
- elif content_type in ('video', 'audio'):
- base_url = ''
- for element in (representation, adaptation_set, period, mpd_doc):
- base_url_e = element.find(_add_ns('BaseURL'))
- if base_url_e is not None:
- base_url = base_url_e.text + base_url
- if re.match(r'^https?://', base_url):
- break
- if mpd_base_url and not re.match(r'^https?://', base_url):
- if not mpd_base_url.endswith('/') and not base_url.startswith('/'):
- mpd_base_url += '/'
- base_url = mpd_base_url + base_url
- representation_id = representation_attrib.get('id')
- lang = representation_attrib.get('lang')
- url_el = representation.find(_add_ns('BaseURL'))
- filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
- bandwidth = int_or_none(representation_attrib.get('bandwidth'))
- f = {
- 'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
- 'manifest_url': mpd_url,
- 'ext': mimetype2ext(mime_type),
- 'width': int_or_none(representation_attrib.get('width')),
- 'height': int_or_none(representation_attrib.get('height')),
- 'tbr': float_or_none(bandwidth, 1000),
- 'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
- 'fps': int_or_none(representation_attrib.get('frameRate')),
- 'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
- 'format_note': 'DASH %s' % content_type,
- 'filesize': filesize,
- 'container': mimetype2ext(mime_type) + '_dash',
- }
- f.update(parse_codecs(representation_attrib.get('codecs')))
- representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
-
- def prepare_template(template_name, identifiers):
- tmpl = representation_ms_info[template_name]
- # First of, % characters outside $...$ templates
- # must be escaped by doubling for proper processing
- # by % operator string formatting used further (see
- # https://github.com/ytdl-org/youtube-dl/issues/16867).
- t = ''
- in_template = False
- for c in tmpl:
- t += c
- if c == '$':
- in_template = not in_template
- elif c == '%' and not in_template:
- t += c
- # Next, $...$ templates are translated to their
- # %(...) counterparts to be used with % operator
- t = t.replace('$RepresentationID$', representation_id)
- t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
- t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
- t.replace('$$', '$')
- return t
-
- # @initialization is a regular template like @media one
- # so it should be handled just the same way (see
- # https://github.com/ytdl-org/youtube-dl/issues/11605)
- if 'initialization' in representation_ms_info:
- initialization_template = prepare_template(
- 'initialization',
- # As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and
- # $Time$ shall not be included for @initialization thus
- # only $Bandwidth$ remains
- ('Bandwidth', ))
- representation_ms_info['initialization_url'] = initialization_template % {
- 'Bandwidth': bandwidth,
- }
-
- def location_key(location):
- return 'url' if re.match(r'^https?://', location) else 'path'
-
- if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
-
- media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
- media_location_key = location_key(media_template)
-
- # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
- # can't be used at the same time
- if '%(Number' in media_template and 's' not in representation_ms_info:
- segment_duration = None
- if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
- segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
- representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
- representation_ms_info['fragments'] = [{
- media_location_key: media_template % {
- 'Number': segment_number,
- 'Bandwidth': bandwidth,
- },
- 'duration': segment_duration,
- } for segment_number in range(
- representation_ms_info['start_number'],
- representation_ms_info['total_number'] + representation_ms_info['start_number'])]
- else:
- # $Number*$ or $Time$ in media template with S list available
- # Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
- # Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
- representation_ms_info['fragments'] = []
- segment_time = 0
- segment_d = None
- segment_number = representation_ms_info['start_number']
-
- def add_segment_url():
- segment_url = media_template % {
- 'Time': segment_time,
- 'Bandwidth': bandwidth,
- 'Number': segment_number,
- }
- representation_ms_info['fragments'].append({
- media_location_key: segment_url,
- 'duration': float_or_none(segment_d, representation_ms_info['timescale']),
- })
-
- for num, s in enumerate(representation_ms_info['s']):
- segment_time = s.get('t') or segment_time
- segment_d = s['d']
- add_segment_url()
- segment_number += 1
- for r in range(s.get('r', 0)):
- segment_time += segment_d
- add_segment_url()
- segment_number += 1
- segment_time += segment_d
- elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info:
- # No media template
- # Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
- # or any YouTube dashsegments video
- fragments = []
- segment_index = 0
- timescale = representation_ms_info['timescale']
- for s in representation_ms_info['s']:
- duration = float_or_none(s['d'], timescale)
- for r in range(s.get('r', 0) + 1):
- segment_uri = representation_ms_info['segment_urls'][segment_index]
- fragments.append({
- location_key(segment_uri): segment_uri,
- 'duration': duration,
- })
- segment_index += 1
- representation_ms_info['fragments'] = fragments
- elif 'segment_urls' in representation_ms_info:
- # Segment URLs with no SegmentTimeline
- # Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
- # https://github.com/ytdl-org/youtube-dl/pull/14844
- fragments = []
- segment_duration = float_or_none(
- representation_ms_info['segment_duration'],
- representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None
- for segment_url in representation_ms_info['segment_urls']:
- fragment = {
- location_key(segment_url): segment_url,
- }
- if segment_duration:
- fragment['duration'] = segment_duration
- fragments.append(fragment)
- representation_ms_info['fragments'] = fragments
- # If there is a fragments key available then we correctly recognized fragmented media.
- # Otherwise we will assume unfragmented media with direct access. Technically, such
- # assumption is not necessarily correct since we may simply have no support for
- # some forms of fragmented media renditions yet, but for now we'll use this fallback.
- if 'fragments' in representation_ms_info:
- f.update({
- # NB: mpd_url may be empty when MPD manifest is parsed from a string
- 'url': mpd_url or base_url,
- 'fragment_base_url': base_url,
- 'fragments': [],
- 'protocol': 'http_dash_segments',
- })
- if 'initialization_url' in representation_ms_info:
- initialization_url = representation_ms_info['initialization_url']
- if not f.get('url'):
- f['url'] = initialization_url
- f['fragments'].append({location_key(initialization_url): initialization_url})
- f['fragments'].extend(representation_ms_info['fragments'])
- else:
- # Assuming direct URL to unfragmented media.
- f['url'] = base_url
-
- # According to [1, 5.3.5.2, Table 7, page 35] @id of Representation
- # is not necessarily unique within a Period thus formats with
- # the same `format_id` are quite possible. There are numerous examples
- # of such manifests (see https://github.com/ytdl-org/youtube-dl/issues/15111,
- # https://github.com/ytdl-org/youtube-dl/issues/13919)
- full_info = formats_dict.get(representation_id, {}).copy()
- full_info.update(f)
- formats.append(full_info)
- else:
- self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
- return formats
-
- def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True):
- res = self._download_xml_handle(
- ism_url, video_id,
- note=note or 'Downloading ISM manifest',
- errnote=errnote or 'Failed to download ISM manifest',
- fatal=fatal)
- if res is False:
- return []
- ism_doc, urlh = res
-
- return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
-
- def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
- """
- Parse formats from ISM manifest.
- References:
- 1. [MS-SSTR]: Smooth Streaming Protocol,
- https://msdn.microsoft.com/en-us/library/ff469518.aspx
- """
- if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None:
- return []
-
- duration = int(ism_doc.attrib['Duration'])
- timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000
-
- formats = []
- for stream in ism_doc.findall('StreamIndex'):
- stream_type = stream.get('Type')
- if stream_type not in ('video', 'audio'):
- continue
- url_pattern = stream.attrib['Url']
- stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
- stream_name = stream.get('Name')
- for track in stream.findall('QualityLevel'):
- fourcc = track.get('FourCC', 'AACL' if track.get('AudioTag') == '255' else None)
- # TODO: add support for WVC1 and WMAP
- if fourcc not in ('H264', 'AVC1', 'AACL'):
- self.report_warning('%s is not a supported codec' % fourcc)
- continue
- tbr = int(track.attrib['Bitrate']) // 1000
- # [1] does not mention Width and Height attributes. However,
- # they're often present while MaxWidth and MaxHeight are
- # missing, so should be used as fallbacks
- width = int_or_none(track.get('MaxWidth') or track.get('Width'))
- height = int_or_none(track.get('MaxHeight') or track.get('Height'))
- sampling_rate = int_or_none(track.get('SamplingRate'))
-
- track_url_pattern = re.sub(r'{[Bb]itrate}', track.attrib['Bitrate'], url_pattern)
- track_url_pattern = compat_urlparse.urljoin(ism_url, track_url_pattern)
-
- fragments = []
- fragment_ctx = {
- 'time': 0,
- }
- stream_fragments = stream.findall('c')
- for stream_fragment_index, stream_fragment in enumerate(stream_fragments):
- fragment_ctx['time'] = int_or_none(stream_fragment.get('t')) or fragment_ctx['time']
- fragment_repeat = int_or_none(stream_fragment.get('r')) or 1
- fragment_ctx['duration'] = int_or_none(stream_fragment.get('d'))
- if not fragment_ctx['duration']:
- try:
- next_fragment_time = int(stream_fragment[stream_fragment_index + 1].attrib['t'])
- except IndexError:
- next_fragment_time = duration
- fragment_ctx['duration'] = (next_fragment_time - fragment_ctx['time']) / fragment_repeat
- for _ in range(fragment_repeat):
- fragments.append({
- 'url': re.sub(r'{start[ _]time}', compat_str(fragment_ctx['time']), track_url_pattern),
- 'duration': fragment_ctx['duration'] / stream_timescale,
- })
- fragment_ctx['time'] += fragment_ctx['duration']
-
- format_id = []
- if ism_id:
- format_id.append(ism_id)
- if stream_name:
- format_id.append(stream_name)
- format_id.append(compat_str(tbr))
-
- formats.append({
- 'format_id': '-'.join(format_id),
- 'url': ism_url,
- 'manifest_url': ism_url,
- 'ext': 'ismv' if stream_type == 'video' else 'isma',
- 'width': width,
- 'height': height,
- 'tbr': tbr,
- 'asr': sampling_rate,
- 'vcodec': 'none' if stream_type == 'audio' else fourcc,
- 'acodec': 'none' if stream_type == 'video' else fourcc,
- 'protocol': 'ism',
- 'fragments': fragments,
- '_download_params': {
- 'duration': duration,
- 'timescale': stream_timescale,
- 'width': width or 0,
- 'height': height or 0,
- 'fourcc': fourcc,
- 'codec_private_data': track.get('CodecPrivateData'),
- 'sampling_rate': sampling_rate,
- 'channels': int_or_none(track.get('Channels', 2)),
- 'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
- 'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
- },
- })
- return formats
-
- def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None):
- def absolute_url(item_url):
- return urljoin(base_url, item_url)
-
- def parse_content_type(content_type):
- if not content_type:
- return {}
- ctr = re.search(r'(?P<mimetype>[^/]+/[^;]+)(?:;\s*codecs="?(?P<codecs>[^"]+))?', content_type)
- if ctr:
- mimetype, codecs = ctr.groups()
- f = parse_codecs(codecs)
- f['ext'] = mimetype2ext(mimetype)
- return f
- return {}
-
- def _media_formats(src, cur_media_type, type_info={}):
- full_url = absolute_url(src)
- ext = type_info.get('ext') or determine_ext(full_url)
- if ext == 'm3u8':
- is_plain_url = False
- formats = self._extract_m3u8_formats(
- full_url, video_id, ext='mp4',
- entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id,
- preference=preference, fatal=False)
- elif ext == 'mpd':
- is_plain_url = False
- formats = self._extract_mpd_formats(
- full_url, video_id, mpd_id=mpd_id, fatal=False)
- else:
- is_plain_url = True
- formats = [{
- 'url': full_url,
- 'vcodec': 'none' if cur_media_type == 'audio' else None,
- }]
- return is_plain_url, formats
-
- entries = []
- # amp-video and amp-audio are very similar to their HTML5 counterparts
- # so we wll include them right here (see
- # https://www.ampproject.org/docs/reference/components/amp-video)
- media_tags = [(media_tag, media_type, '')
- for media_tag, media_type
- in re.findall(r'(?s)(<(?:amp-)?(video|audio)[^>]*/>)', webpage)]
- media_tags.extend(re.findall(
- # We only allow video|audio followed by a whitespace or '>'.
- # Allowing more characters may end up in significant slow down (see
- # https://github.com/ytdl-org/youtube-dl/issues/11979, example URL:
- # http://www.porntrex.com/maps/videositemap.xml).
- r'(?s)(<(?P<tag>(?:amp-)?(?:video|audio))(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
- for media_tag, media_type, media_content in media_tags:
- media_info = {
- 'formats': [],
- 'subtitles': {},
- }
- media_attributes = extract_attributes(media_tag)
- src = strip_or_none(media_attributes.get('src'))
- if src:
- _, formats = _media_formats(src, media_type)
- media_info['formats'].extend(formats)
- media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
- if media_content:
- for source_tag in re.findall(r'<source[^>]+>', media_content):
- s_attr = extract_attributes(source_tag)
- # data-video-src and data-src are non standard but seen
- # several times in the wild
- src = strip_or_none(dict_get(s_attr, ('src', 'data-video-src', 'data-src')))
- if not src:
- continue
- f = parse_content_type(s_attr.get('type'))
- is_plain_url, formats = _media_formats(src, media_type, f)
- if is_plain_url:
- # width, height, res, label and title attributes are
- # all not standard but seen several times in the wild
- labels = [
- s_attr.get(lbl)
- for lbl in ('label', 'title')
- if str_or_none(s_attr.get(lbl))
- ]
- width = int_or_none(s_attr.get('width'))
- height = (int_or_none(s_attr.get('height'))
- or int_or_none(s_attr.get('res')))
- if not width or not height:
- for lbl in labels:
- resolution = parse_resolution(lbl)
- if not resolution:
- continue
- width = width or resolution.get('width')
- height = height or resolution.get('height')
- for lbl in labels:
- tbr = parse_bitrate(lbl)
- if tbr:
- break
- else:
- tbr = None
- f.update({
- 'width': width,
- 'height': height,
- 'tbr': tbr,
- 'format_id': s_attr.get('label') or s_attr.get('title'),
- })
- f.update(formats[0])
- media_info['formats'].append(f)
- else:
- media_info['formats'].extend(formats)
- for track_tag in re.findall(r'<track[^>]+>', media_content):
- track_attributes = extract_attributes(track_tag)
- kind = track_attributes.get('kind')
- if not kind or kind in ('subtitles', 'captions'):
- src = strip_or_none(track_attributes.get('src'))
- if not src:
- continue
- lang = track_attributes.get('srclang') or track_attributes.get('lang') or track_attributes.get('label')
- media_info['subtitles'].setdefault(lang, []).append({
- 'url': absolute_url(src),
- })
- for f in media_info['formats']:
- f.setdefault('http_headers', {})['Referer'] = base_url
- if media_info['formats'] or media_info['subtitles']:
- entries.append(media_info)
- return entries
-
- def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
- formats = []
- hdcore_sign = 'hdcore=3.7.0'
- f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
- hds_host = hosts.get('hds')
- if hds_host:
- f4m_url = re.sub(r'(https?://)[^/]+', r'\1' + hds_host, f4m_url)
- if 'hdcore=' not in f4m_url:
- f4m_url += ('&' if '?' in f4m_url else '?') + hdcore_sign
- f4m_formats = self._extract_f4m_formats(
- f4m_url, video_id, f4m_id='hds', fatal=False)
- for entry in f4m_formats:
- entry.update({'extra_param_to_segment_url': hdcore_sign})
- formats.extend(f4m_formats)
- m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
- hls_host = hosts.get('hls')
- if hls_host:
- m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url)
- formats.extend(self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4', 'm3u8_native',
- m3u8_id='hls', fatal=False))
- return formats
-
- def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
- query = compat_urlparse.urlparse(url).query
- url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
- mobj = re.search(
- r'(?:(?:http|rtmp|rtsp)(?P<s>s)?:)?(?P<url>//[^?]+)', url)
- url_base = mobj.group('url')
- http_base_url = '%s%s:%s' % ('http', mobj.group('s') or '', url_base)
- formats = []
-
- def manifest_url(manifest):
- m_url = '%s/%s' % (http_base_url, manifest)
- if query:
- m_url += '?%s' % query
- return m_url
-
- if 'm3u8' not in skip_protocols:
- formats.extend(self._extract_m3u8_formats(
- manifest_url('playlist.m3u8'), video_id, 'mp4',
- m3u8_entry_protocol, m3u8_id='hls', fatal=False))
- if 'f4m' not in skip_protocols:
- formats.extend(self._extract_f4m_formats(
- manifest_url('manifest.f4m'),
- video_id, f4m_id='hds', fatal=False))
- if 'dash' not in skip_protocols:
- formats.extend(self._extract_mpd_formats(
- manifest_url('manifest.mpd'),
- video_id, mpd_id='dash', fatal=False))
- if re.search(r'(?:/smil:|\.smil)', url_base):
- if 'smil' not in skip_protocols:
- rtmp_formats = self._extract_smil_formats(
- manifest_url('jwplayer.smil'),
- video_id, fatal=False)
- for rtmp_format in rtmp_formats:
- rtsp_format = rtmp_format.copy()
- rtsp_format['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
- del rtsp_format['play_path']
- del rtsp_format['ext']
- rtsp_format.update({
- 'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'),
- 'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
- 'protocol': 'rtsp',
- })
- formats.extend([rtmp_format, rtsp_format])
- else:
- for protocol in ('rtmp', 'rtsp'):
- if protocol not in skip_protocols:
- formats.append({
- 'url': '%s:%s' % (protocol, url_base),
- 'format_id': protocol,
- 'protocol': protocol,
- })
- return formats
-
- def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
- mobj = re.search(
- r'(?s)jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)(?!</script>).*?\.setup\s*\((?P<options>[^)]+)\)',
- webpage)
- if mobj:
- try:
- jwplayer_data = self._parse_json(mobj.group('options'),
- video_id=video_id,
- transform_source=transform_source)
- except ExtractorError:
- pass
- else:
- if isinstance(jwplayer_data, dict):
- return jwplayer_data
-
- def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
- jwplayer_data = self._find_jwplayer_data(
- webpage, video_id, transform_source=js_to_json)
- return self._parse_jwplayer_data(
- jwplayer_data, video_id, *args, **kwargs)
-
- def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
- m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
- # JWPlayer backward compatibility: flattened playlists
- # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
- if 'playlist' not in jwplayer_data:
- jwplayer_data = {'playlist': [jwplayer_data]}
-
- entries = []
-
- # JWPlayer backward compatibility: single playlist item
- # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10
- if not isinstance(jwplayer_data['playlist'], list):
- jwplayer_data['playlist'] = [jwplayer_data['playlist']]
-
- for video_data in jwplayer_data['playlist']:
- # JWPlayer backward compatibility: flattened sources
- # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
- if 'sources' not in video_data:
- video_data['sources'] = [video_data]
-
- this_video_id = video_id or video_data['mediaid']
-
- formats = self._parse_jwplayer_formats(
- video_data['sources'], video_id=this_video_id, m3u8_id=m3u8_id,
- mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
-
- subtitles = {}
- tracks = video_data.get('tracks')
- if tracks and isinstance(tracks, list):
- for track in tracks:
- if not isinstance(track, dict):
- continue
- track_kind = track.get('kind')
- if not track_kind or not isinstance(track_kind, compat_str):
- continue
- if track_kind.lower() not in ('captions', 'subtitles'):
- continue
- track_url = urljoin(base_url, track.get('file'))
- if not track_url:
- continue
- subtitles.setdefault(track.get('label') or 'en', []).append({
- 'url': self._proto_relative_url(track_url)
- })
-
- entry = {
- 'id': this_video_id,
- 'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')),
- 'description': video_data.get('description'),
- 'thumbnail': urljoin(base_url, self._proto_relative_url(video_data.get('image'))),
- 'timestamp': int_or_none(video_data.get('pubdate')),
- 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
- 'subtitles': subtitles,
- }
- # https://github.com/jwplayer/jwplayer/blob/master/src/js/utils/validator.js#L32
- if len(formats) == 1 and re.search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', formats[0]['url']):
- entry.update({
- '_type': 'url_transparent',
- 'url': formats[0]['url'],
- })
- else:
- self._sort_formats(formats)
- entry['formats'] = formats
- entries.append(entry)
- if len(entries) == 1:
- return entries[0]
- else:
- return self.playlist_result(entries)
-
- def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
- m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
- urls = []
- formats = []
- for source in jwplayer_sources_data:
- if not isinstance(source, dict):
- continue
- source_url = urljoin(
- base_url, self._proto_relative_url(source.get('file')))
- if not source_url or source_url in urls:
- continue
- urls.append(source_url)
- source_type = source.get('type') or ''
- ext = mimetype2ext(source_type) or determine_ext(source_url)
- if source_type == 'hls' or ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- source_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id=m3u8_id, fatal=False))
- elif source_type == 'dash' or ext == 'mpd':
- formats.extend(self._extract_mpd_formats(
- source_url, video_id, mpd_id=mpd_id, fatal=False))
- elif ext == 'smil':
- formats.extend(self._extract_smil_formats(
- source_url, video_id, fatal=False))
- # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
- elif source_type.startswith('audio') or ext in (
- 'oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
- formats.append({
- 'url': source_url,
- 'vcodec': 'none',
- 'ext': ext,
- })
- else:
- height = int_or_none(source.get('height'))
- if height is None:
- # Often no height is provided but there is a label in
- # format like "1080p", "720p SD", or 1080.
- height = int_or_none(self._search_regex(
- r'^(\d{3,4})[pP]?(?:\b|$)', compat_str(source.get('label') or ''),
- 'height', default=None))
- a_format = {
- 'url': source_url,
- 'width': int_or_none(source.get('width')),
- 'height': height,
- 'tbr': int_or_none(source.get('bitrate')),
- 'ext': ext,
- }
- if source_url.startswith('rtmp'):
- a_format['ext'] = 'flv'
- # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
- # of jwplayer.flash.swf
- rtmp_url_parts = re.split(
- r'((?:mp4|mp3|flv):)', source_url, 1)
- if len(rtmp_url_parts) == 3:
- rtmp_url, prefix, play_path = rtmp_url_parts
- a_format.update({
- 'url': rtmp_url,
- 'play_path': prefix + play_path,
- })
- if rtmp_params:
- a_format.update(rtmp_params)
- formats.append(a_format)
- return formats
-
- def _live_title(self, name):
- """ Generate the title for a live video """
- now = datetime.datetime.now()
- now_str = now.strftime('%Y-%m-%d %H:%M')
- return name + ' ' + now_str
-
- def _int(self, v, name, fatal=False, **kwargs):
- res = int_or_none(v, **kwargs)
- if 'get_attr' in kwargs:
- print(getattr(v, kwargs['get_attr']))
- if res is None:
- msg = 'Failed to extract %s: Could not parse value %r' % (name, v)
- if fatal:
- raise ExtractorError(msg)
- else:
- self._downloader.report_warning(msg)
- return res
-
- def _float(self, v, name, fatal=False, **kwargs):
- res = float_or_none(v, **kwargs)
- if res is None:
- msg = 'Failed to extract %s: Could not parse value %r' % (name, v)
- if fatal:
- raise ExtractorError(msg)
- else:
- self._downloader.report_warning(msg)
- return res
-
- def _set_cookie(self, domain, name, value, expire_time=None, port=None,
- path='/', secure=False, discard=False, rest={}, **kwargs):
- cookie = compat_cookiejar.Cookie(
- 0, name, value, port, port is not None, domain, True,
- domain.startswith('.'), path, True, secure, expire_time,
- discard, None, None, rest)
- self._downloader.cookiejar.set_cookie(cookie)
-
- def _get_cookies(self, url):
- """ Return a compat_cookies.SimpleCookie with the cookies for the url """
- req = sanitized_Request(url)
- self._downloader.cookiejar.add_cookie_header(req)
- return compat_cookies.SimpleCookie(req.get_header('Cookie'))
-
- def _apply_first_set_cookie_header(self, url_handle, cookie):
- """
- Apply first Set-Cookie header instead of the last. Experimental.
-
- Some sites (e.g. [1-3]) may serve two cookies under the same name
- in Set-Cookie header and expect the first (old) one to be set rather
- than second (new). However, as of RFC6265 the newer one cookie
- should be set into cookie store what actually happens.
- We will workaround this issue by resetting the cookie to
- the first one manually.
- 1. https://new.vk.com/
- 2. https://github.com/ytdl-org/youtube-dl/issues/9841#issuecomment-227871201
- 3. https://learning.oreilly.com/
- """
- for header, cookies in url_handle.headers.items():
- if header.lower() != 'set-cookie':
- continue
- if sys.version_info[0] >= 3:
- cookies = cookies.encode('iso-8859-1')
- cookies = cookies.decode('utf-8')
- cookie_value = re.search(
- r'%s=(.+?);.*?\b[Dd]omain=(.+?)(?:[,;]|$)' % cookie, cookies)
- if cookie_value:
- value, domain = cookie_value.groups()
- self._set_cookie(domain, cookie, value)
- break
-
- def get_testcases(self, include_onlymatching=False):
- t = getattr(self, '_TEST', None)
- if t:
- assert not hasattr(self, '_TESTS'), \
- '%s has _TEST and _TESTS' % type(self).__name__
- tests = [t]
- else:
- tests = getattr(self, '_TESTS', [])
- for t in tests:
- if not include_onlymatching and t.get('only_matching', False):
- continue
- t['name'] = type(self).__name__[:-len('IE')]
- yield t
-
- def is_suitable(self, age_limit):
- """ Test whether the extractor is generally suitable for the given
- age limit (i.e. pornographic sites are not, all others usually are) """
-
- any_restricted = False
- for tc in self.get_testcases(include_onlymatching=False):
- if tc.get('playlist', []):
- tc = tc['playlist'][0]
- is_restricted = age_restricted(
- tc.get('info_dict', {}).get('age_limit'), age_limit)
- if not is_restricted:
- return True
- any_restricted = any_restricted or is_restricted
- return not any_restricted
-
- def extract_subtitles(self, *args, **kwargs):
- if (self._downloader.params.get('writesubtitles', False)
- or self._downloader.params.get('listsubtitles')):
- return self._get_subtitles(*args, **kwargs)
- return {}
-
- def _get_subtitles(self, *args, **kwargs):
- raise NotImplementedError('This method must be implemented by subclasses')
-
- @staticmethod
- def _merge_subtitle_items(subtitle_list1, subtitle_list2):
- """ Merge subtitle items for one language. Items with duplicated URLs
- will be dropped. """
- list1_urls = set([item['url'] for item in subtitle_list1])
- ret = list(subtitle_list1)
- ret.extend([item for item in subtitle_list2 if item['url'] not in list1_urls])
- return ret
-
- @classmethod
- def _merge_subtitles(cls, subtitle_dict1, subtitle_dict2):
- """ Merge two subtitle dictionaries, language by language. """
- ret = dict(subtitle_dict1)
- for lang in subtitle_dict2:
- ret[lang] = cls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang])
- return ret
-
- def extract_automatic_captions(self, *args, **kwargs):
- if (self._downloader.params.get('writeautomaticsub', False)
- or self._downloader.params.get('listsubtitles')):
- return self._get_automatic_captions(*args, **kwargs)
- return {}
-
- def _get_automatic_captions(self, *args, **kwargs):
- raise NotImplementedError('This method must be implemented by subclasses')
-
- def mark_watched(self, *args, **kwargs):
- if (self._downloader.params.get('mark_watched', False)
- and (self._get_login_info()[0] is not None
- or self._downloader.params.get('cookiefile') is not None)):
- self._mark_watched(*args, **kwargs)
-
- def _mark_watched(self, *args, **kwargs):
- raise NotImplementedError('This method must be implemented by subclasses')
-
- def geo_verification_headers(self):
- headers = {}
- geo_verification_proxy = self._downloader.params.get('geo_verification_proxy')
- if geo_verification_proxy:
- headers['Ytdl-request-proxy'] = geo_verification_proxy
- return headers
-
- def _generic_id(self, url):
- return compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
-
- def _generic_title(self, url):
- return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
-
-
-class SearchInfoExtractor(InfoExtractor):
- """
- Base class for paged search queries extractors.
- They accept URLs in the format _SEARCH_KEY(|all|[0-9]):{query}
- Instances should define _SEARCH_KEY and _MAX_RESULTS.
- """
-
- @classmethod
- def _make_valid_url(cls):
- return r'%s(?P<prefix>|[1-9][0-9]*|all):(?P<query>[\s\S]+)' % cls._SEARCH_KEY
-
- @classmethod
- def suitable(cls, url):
- return re.match(cls._make_valid_url(), url) is not None
-
- def _real_extract(self, query):
- mobj = re.match(self._make_valid_url(), query)
- if mobj is None:
- raise ExtractorError('Invalid search query "%s"' % query)
-
- prefix = mobj.group('prefix')
- query = mobj.group('query')
- if prefix == '':
- return self._get_n_results(query, 1)
- elif prefix == 'all':
- return self._get_n_results(query, self._MAX_RESULTS)
- else:
- n = int(prefix)
- if n <= 0:
- raise ExtractorError('invalid download number %s for query "%s"' % (n, query))
- elif n > self._MAX_RESULTS:
- self._downloader.report_warning('%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n))
- n = self._MAX_RESULTS
- return self._get_n_results(query, n)
-
- def _get_n_results(self, query, n):
- """Get a specified number of results for a query"""
- raise NotImplementedError('This method must be implemented by subclasses')
-
- @property
- def SEARCH_KEY(self):
- return self._SEARCH_KEY
diff --git a/youtube_dl/extractor/commonmistakes.py b/youtube_dl/extractor/commonmistakes.py
deleted file mode 100644
index 7e12499b1..000000000
--- a/youtube_dl/extractor/commonmistakes.py
+++ /dev/null
@@ -1,50 +0,0 @@
-from __future__ import unicode_literals
-
-import sys
-
-from .common import InfoExtractor
-from ..utils import ExtractorError
-
-
-class CommonMistakesIE(InfoExtractor):
- IE_DESC = False # Do not list
- _VALID_URL = r'''(?x)
- (?:url|URL)$
- '''
-
- _TESTS = [{
- 'url': 'url',
- 'only_matching': True,
- }, {
- 'url': 'URL',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- msg = (
- 'You\'ve asked youtube-dl to download the URL "%s". '
- 'That doesn\'t make any sense. '
- 'Simply remove the parameter in your command or configuration.'
- ) % url
- if not self._downloader.params.get('verbose'):
- msg += ' Add -v to the command line to see what arguments and configuration youtube-dl got.'
- raise ExtractorError(msg, expected=True)
-
-
-class UnicodeBOMIE(InfoExtractor):
- IE_DESC = False
- _VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'
-
- # Disable test for python 3.2 since BOM is broken in re in this version
- # (see https://github.com/ytdl-org/youtube-dl/issues/9751)
- _TESTS = [] if (3, 0) < sys.version_info <= (3, 3) else [{
- 'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- real_url = self._match_id(url)
- self.report_warning(
- 'Your URL starts with a Byte Order Mark (BOM). '
- 'Removing the BOM and looking for "%s" ...' % real_url)
- return self.url_result(real_url)
diff --git a/youtube_dl/extractor/corus.py b/youtube_dl/extractor/corus.py
deleted file mode 100644
index a1b251804..000000000
--- a/youtube_dl/extractor/corus.py
+++ /dev/null
@@ -1,105 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .theplatform import ThePlatformFeedIE
-from ..utils import int_or_none
-
-
-class CorusIE(ThePlatformFeedIE):
- _VALID_URL = r'''(?x)
- https?://
- (?:www\.)?
- (?P<domain>
- (?:globaltv|etcanada)\.com|
- (?:hgtv|foodnetwork|slice|history|showcase|bigbrothercanada)\.ca
- )
- /(?:video/(?:[^/]+/)?|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=))
- (?P<id>\d+)
- '''
- _TESTS = [{
- 'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/',
- 'md5': '05dcbca777bf1e58c2acbb57168ad3a6',
- 'info_dict': {
- 'id': '870923331648',
- 'ext': 'mp4',
- 'title': 'Movie Night Popcorn with Bryan',
- 'description': 'Bryan whips up homemade popcorn, the old fashion way for Jojo and Lincoln.',
- 'uploader': 'SHWM-NEW',
- 'upload_date': '20170206',
- 'timestamp': 1486392197,
- },
- }, {
- 'url': 'http://www.foodnetwork.ca/shows/chopped/video/episode/chocolate-obsession/video.html?v=872683587753',
- 'only_matching': True,
- }, {
- 'url': 'http://etcanada.com/video/873675331955/meet-the-survivor-game-changers-castaways-part-2/',
- 'only_matching': True,
- }, {
- 'url': 'http://www.history.ca/the-world-without-canada/video/full-episodes/natural-resources/video.html?v=955054659646#video',
- 'only_matching': True,
- }, {
- 'url': 'http://www.showcase.ca/eyewitness/video/eyewitness++106/video.html?v=955070531919&p=1&s=da#video',
- 'only_matching': True,
- }, {
- 'url': 'http://www.bigbrothercanada.ca/video/1457812035894/',
- 'only_matching': True
- }, {
- 'url': 'https://www.bigbrothercanada.ca/video/big-brother-canada-704/1457812035894/',
- 'only_matching': True
- }]
-
- _TP_FEEDS = {
- 'globaltv': {
- 'feed_id': 'ChQqrem0lNUp',
- 'account_id': 2269680845,
- },
- 'etcanada': {
- 'feed_id': 'ChQqrem0lNUp',
- 'account_id': 2269680845,
- },
- 'hgtv': {
- 'feed_id': 'L0BMHXi2no43',
- 'account_id': 2414428465,
- },
- 'foodnetwork': {
- 'feed_id': 'ukK8o58zbRmJ',
- 'account_id': 2414429569,
- },
- 'slice': {
- 'feed_id': '5tUJLgV2YNJ5',
- 'account_id': 2414427935,
- },
- 'history': {
- 'feed_id': 'tQFx_TyyEq4J',
- 'account_id': 2369613659,
- },
- 'showcase': {
- 'feed_id': '9H6qyshBZU3E',
- 'account_id': 2414426607,
- },
- 'bigbrothercanada': {
- 'feed_id': 'ChQqrem0lNUp',
- 'account_id': 2269680845,
- },
- }
-
- def _real_extract(self, url):
- domain, video_id = re.match(self._VALID_URL, url).groups()
- feed_info = self._TP_FEEDS[domain.split('.')[0]]
- return self._extract_feed_info('dtjsEC', feed_info['feed_id'], 'byId=' + video_id, video_id, lambda e: {
- 'episode_number': int_or_none(e.get('pl1$episode')),
- 'season_number': int_or_none(e.get('pl1$season')),
- 'series': e.get('pl1$show'),
- }, {
- 'HLS': {
- 'manifest': 'm3u',
- },
- 'DesktopHLS Default': {
- 'manifest': 'm3u',
- },
- 'MP4 MBR': {
- 'manifest': 'm3u',
- },
- }, feed_info['account_id'])
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
deleted file mode 100644
index 85a9a577f..000000000
--- a/youtube_dl/extractor/crunchyroll.py
+++ /dev/null
@@ -1,680 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-import json
-import zlib
-
-from hashlib import sha1
-from math import pow, sqrt, floor
-from .common import InfoExtractor
-from .vrv import VRVIE
-from ..compat import (
- compat_b64decode,
- compat_etree_Element,
- compat_etree_fromstring,
- compat_urllib_parse_urlencode,
- compat_urllib_request,
- compat_urlparse,
-)
-from ..utils import (
- ExtractorError,
- bytes_to_intlist,
- extract_attributes,
- float_or_none,
- intlist_to_bytes,
- int_or_none,
- lowercase_escape,
- remove_end,
- sanitized_Request,
- unified_strdate,
- urlencode_postdata,
- xpath_text,
-)
-from ..aes import (
- aes_cbc_decrypt,
-)
-
-
-class CrunchyrollBaseIE(InfoExtractor):
- _LOGIN_URL = 'https://www.crunchyroll.com/login'
- _LOGIN_FORM = 'login_form'
- _NETRC_MACHINE = 'crunchyroll'
-
- def _call_rpc_api(self, method, video_id, note=None, data=None):
- data = data or {}
- data['req'] = 'RpcApi' + method
- data = compat_urllib_parse_urlencode(data).encode('utf-8')
- return self._download_xml(
- 'https://www.crunchyroll.com/xml/',
- video_id, note, fatal=False, data=data, headers={
- 'Content-Type': 'application/x-www-form-urlencoded',
- })
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
- login_page = self._download_webpage(
- self._LOGIN_URL, None, 'Downloading login page')
-
- def is_logged(webpage):
- return 'href="/logout"' in webpage
-
- # Already logged in
- if is_logged(login_page):
- return
-
- login_form_str = self._search_regex(
- r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LOGIN_FORM,
- login_page, 'login form', group='form')
-
- post_url = extract_attributes(login_form_str).get('action')
- if not post_url:
- post_url = self._LOGIN_URL
- elif not post_url.startswith('http'):
- post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
-
- login_form = self._form_hidden_inputs(self._LOGIN_FORM, login_page)
-
- login_form.update({
- 'login_form[name]': username,
- 'login_form[password]': password,
- })
-
- response = self._download_webpage(
- post_url, None, 'Logging in', 'Wrong login info',
- data=urlencode_postdata(login_form),
- headers={'Content-Type': 'application/x-www-form-urlencoded'})
-
- # Successful login
- if is_logged(response):
- return
-
- error = self._html_search_regex(
- '(?s)<ul[^>]+class=["\']messages["\'][^>]*>(.+?)</ul>',
- response, 'error message', default=None)
- if error:
- raise ExtractorError('Unable to login: %s' % error, expected=True)
-
- raise ExtractorError('Unable to log in')
-
- def _real_initialize(self):
- self._login()
-
- @staticmethod
- def _add_skip_wall(url):
- parsed_url = compat_urlparse.urlparse(url)
- qs = compat_urlparse.parse_qs(parsed_url.query)
- # Always force skip_wall to bypass maturity wall, namely 18+ confirmation message:
- # > This content may be inappropriate for some people.
- # > Are you sure you want to continue?
- # since it's not disabled by default in crunchyroll account's settings.
- # See https://github.com/ytdl-org/youtube-dl/issues/7202.
- qs['skip_wall'] = ['1']
- return compat_urlparse.urlunparse(
- parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
-
-
-class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
- IE_NAME = 'crunchyroll'
- _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
- _TESTS = [{
- 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
- 'info_dict': {
- 'id': '645513',
- 'ext': 'mp4',
- 'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
- 'description': 'md5:2d17137920c64f2f49981a7797d275ef',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'Yomiuri Telecasting Corporation (YTV)',
- 'upload_date': '20131013',
- 'url': 're:(?!.*&amp)',
- },
- 'params': {
- # rtmp
- 'skip_download': True,
- },
- }, {
- 'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1',
- 'info_dict': {
- 'id': '589804',
- 'ext': 'flv',
- 'title': 'Culture Japan Episode 1 – Rebuilding Japan after the 3.11',
- 'description': 'md5:2fbc01f90b87e8e9137296f37b461c12',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'Danny Choo Network',
- 'upload_date': '20120213',
- },
- 'params': {
- # rtmp
- 'skip_download': True,
- },
- 'skip': 'Video gone',
- }, {
- 'url': 'http://www.crunchyroll.com/rezero-starting-life-in-another-world-/episode-5-the-morning-of-our-promise-is-still-distant-702409',
- 'info_dict': {
- 'id': '702409',
- 'ext': 'mp4',
- 'title': 'Re:ZERO -Starting Life in Another World- Episode 5 – The Morning of Our Promise Is Still Distant',
- 'description': 'md5:97664de1ab24bbf77a9c01918cb7dca9',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'TV TOKYO',
- 'upload_date': '20160508',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- 'url': 'http://www.crunchyroll.com/konosuba-gods-blessing-on-this-wonderful-world/episode-1-give-me-deliverance-from-this-judicial-injustice-727589',
- 'info_dict': {
- 'id': '727589',
- 'ext': 'mp4',
- 'title': "KONOSUBA -God's blessing on this wonderful world! 2 Episode 1 – Give Me Deliverance From This Judicial Injustice!",
- 'description': 'md5:cbcf05e528124b0f3a0a419fc805ea7d',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'Kadokawa Pictures Inc.',
- 'upload_date': '20170118',
- 'series': "KONOSUBA -God's blessing on this wonderful world!",
- 'season': "KONOSUBA -God's blessing on this wonderful world! 2",
- 'season_number': 2,
- 'episode': 'Give Me Deliverance From This Judicial Injustice!',
- 'episode_number': 1,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- 'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
- 'only_matching': True,
- }, {
- # geo-restricted (US), 18+ maturity wall, non-premium available
- 'url': 'http://www.crunchyroll.com/cosplay-complex-ova/episode-1-the-birth-of-the-cosplay-club-565617',
- 'only_matching': True,
- }, {
- # A description with double quotes
- 'url': 'http://www.crunchyroll.com/11eyes/episode-1-piros-jszaka-red-night-535080',
- 'info_dict': {
- 'id': '535080',
- 'ext': 'mp4',
- 'title': '11eyes Episode 1 – Red Night ~ Piros éjszaka',
- 'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".',
- 'uploader': 'Marvelous AQL Inc.',
- 'upload_date': '20091021',
- },
- 'params': {
- # Just test metadata extraction
- 'skip_download': True,
- },
- }, {
- # make sure we can extract an uploader name that's not a link
- 'url': 'http://www.crunchyroll.com/hakuoki-reimeiroku/episode-1-dawn-of-the-divine-warriors-606899',
- 'info_dict': {
- 'id': '606899',
- 'ext': 'mp4',
- 'title': 'Hakuoki Reimeiroku Episode 1 – Dawn of the Divine Warriors',
- 'description': 'Ryunosuke was left to die, but Serizawa-san asked him a simple question "Do you want to live?"',
- 'uploader': 'Geneon Entertainment',
- 'upload_date': '20120717',
- },
- 'params': {
- # just test metadata extraction
- 'skip_download': True,
- },
- }, {
- # A video with a vastly different season name compared to the series name
- 'url': 'http://www.crunchyroll.com/nyarko-san-another-crawling-chaos/episode-1-test-590532',
- 'info_dict': {
- 'id': '590532',
- 'ext': 'mp4',
- 'title': 'Haiyoru! Nyaruani (ONA) Episode 1 – Test',
- 'description': 'Mahiro and Nyaruko talk about official certification.',
- 'uploader': 'TV TOKYO',
- 'upload_date': '20120305',
- 'series': 'Nyarko-san: Another Crawling Chaos',
- 'season': 'Haiyoru! Nyaruani (ONA)',
- },
- 'params': {
- # Just test metadata extraction
- 'skip_download': True,
- },
- }, {
- 'url': 'http://www.crunchyroll.com/media-723735',
- 'only_matching': True,
- }, {
- 'url': 'https://www.crunchyroll.com/en-gb/mob-psycho-100/episode-2-urban-legends-encountering-rumors-780921',
- 'only_matching': True,
- }]
-
- _FORMAT_IDS = {
- '360': ('60', '106'),
- '480': ('61', '106'),
- '720': ('62', '106'),
- '1080': ('80', '108'),
- }
-
- def _download_webpage(self, url_or_request, *args, **kwargs):
- request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
- else sanitized_Request(url_or_request))
- # Accept-Language must be set explicitly to accept any language to avoid issues
- # similar to https://github.com/ytdl-org/youtube-dl/issues/6797.
- # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
- # should be imposed or not (from what I can see it just takes the first language
- # ignoring the priority and requires it to correspond the IP). By the way this causes
- # Crunchyroll to not work in georestriction cases in some browsers that don't place
- # the locale lang first in header. However allowing any language seems to workaround the issue.
- request.add_header('Accept-Language', '*')
- return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)
-
- def _decrypt_subtitles(self, data, iv, id):
- data = bytes_to_intlist(compat_b64decode(data))
- iv = bytes_to_intlist(compat_b64decode(iv))
- id = int(id)
-
- def obfuscate_key_aux(count, modulo, start):
- output = list(start)
- for _ in range(count):
- output.append(output[-1] + output[-2])
- # cut off start values
- output = output[2:]
- output = list(map(lambda x: x % modulo + 33, output))
- return output
-
- def obfuscate_key(key):
- num1 = int(floor(pow(2, 25) * sqrt(6.9)))
- num2 = (num1 ^ key) << 5
- num3 = key ^ num1
- num4 = num3 ^ (num3 >> 3) ^ num2
- prefix = intlist_to_bytes(obfuscate_key_aux(20, 97, (1, 2)))
- shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode('ascii')).digest())
- # Extend 160 Bit hash to 256 Bit
- return shaHash + [0] * 12
-
- key = obfuscate_key(id)
-
- decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
- return zlib.decompress(decrypted_data)
-
- def _convert_subtitles_to_srt(self, sub_root):
- output = ''
-
- for i, event in enumerate(sub_root.findall('./events/event'), 1):
- start = event.attrib['start'].replace('.', ',')
- end = event.attrib['end'].replace('.', ',')
- text = event.attrib['text'].replace('\\N', '\n')
- output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
- return output
-
- def _convert_subtitles_to_ass(self, sub_root):
- output = ''
-
- def ass_bool(strvalue):
- assvalue = '0'
- if strvalue == '1':
- assvalue = '-1'
- return assvalue
-
- output = '[Script Info]\n'
- output += 'Title: %s\n' % sub_root.attrib['title']
- output += 'ScriptType: v4.00+\n'
- output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style']
- output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x']
- output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y']
- output += """
-[V4+ Styles]
-Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
-"""
- for style in sub_root.findall('./styles/style'):
- output += 'Style: ' + style.attrib['name']
- output += ',' + style.attrib['font_name']
- output += ',' + style.attrib['font_size']
- output += ',' + style.attrib['primary_colour']
- output += ',' + style.attrib['secondary_colour']
- output += ',' + style.attrib['outline_colour']
- output += ',' + style.attrib['back_colour']
- output += ',' + ass_bool(style.attrib['bold'])
- output += ',' + ass_bool(style.attrib['italic'])
- output += ',' + ass_bool(style.attrib['underline'])
- output += ',' + ass_bool(style.attrib['strikeout'])
- output += ',' + style.attrib['scale_x']
- output += ',' + style.attrib['scale_y']
- output += ',' + style.attrib['spacing']
- output += ',' + style.attrib['angle']
- output += ',' + style.attrib['border_style']
- output += ',' + style.attrib['outline']
- output += ',' + style.attrib['shadow']
- output += ',' + style.attrib['alignment']
- output += ',' + style.attrib['margin_l']
- output += ',' + style.attrib['margin_r']
- output += ',' + style.attrib['margin_v']
- output += ',' + style.attrib['encoding']
- output += '\n'
-
- output += """
-[Events]
-Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
-"""
- for event in sub_root.findall('./events/event'):
- output += 'Dialogue: 0'
- output += ',' + event.attrib['start']
- output += ',' + event.attrib['end']
- output += ',' + event.attrib['style']
- output += ',' + event.attrib['name']
- output += ',' + event.attrib['margin_l']
- output += ',' + event.attrib['margin_r']
- output += ',' + event.attrib['margin_v']
- output += ',' + event.attrib['effect']
- output += ',' + event.attrib['text']
- output += '\n'
-
- return output
-
- def _extract_subtitles(self, subtitle):
- sub_root = compat_etree_fromstring(subtitle)
- return [{
- 'ext': 'srt',
- 'data': self._convert_subtitles_to_srt(sub_root),
- }, {
- 'ext': 'ass',
- 'data': self._convert_subtitles_to_ass(sub_root),
- }]
-
- def _get_subtitles(self, video_id, webpage):
- subtitles = {}
- for sub_id, sub_name in re.findall(r'\bssid=([0-9]+)"[^>]+?\btitle="([^"]+)', webpage):
- sub_doc = self._call_rpc_api(
- 'Subtitle_GetXml', video_id,
- 'Downloading subtitles for ' + sub_name, data={
- 'subtitle_script_id': sub_id,
- })
- if not isinstance(sub_doc, compat_etree_Element):
- continue
- sid = sub_doc.get('id')
- iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
- data = xpath_text(sub_doc, 'data', 'subtitle data')
- if not sid or not iv or not data:
- continue
- subtitle = self._decrypt_subtitles(data, iv, sid).decode('utf-8')
- lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
- if not lang_code:
- continue
- subtitles[lang_code] = self._extract_subtitles(subtitle)
- return subtitles
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('video_id')
-
- if mobj.group('prefix') == 'm':
- mobile_webpage = self._download_webpage(url, video_id, 'Downloading mobile webpage')
- webpage_url = self._search_regex(r'<link rel="canonical" href="([^"]+)" />', mobile_webpage, 'webpage_url')
- else:
- webpage_url = 'http://www.' + mobj.group('url')
-
- webpage = self._download_webpage(
- self._add_skip_wall(webpage_url), video_id,
- headers=self.geo_verification_headers())
- note_m = self._html_search_regex(
- r'<div class="showmedia-trailer-notice">(.+?)</div>',
- webpage, 'trailer-notice', default='')
- if note_m:
- raise ExtractorError(note_m)
-
- mobj = re.search(r'Page\.messaging_box_controller\.addItems\(\[(?P<msg>{.+?})\]\)', webpage)
- if mobj:
- msg = json.loads(mobj.group('msg'))
- if msg.get('type') == 'error':
- raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True)
-
- if 'To view this, please log in to verify you are 18 or older.' in webpage:
- self.raise_login_required()
-
- media = self._parse_json(self._search_regex(
- r'vilos\.config\.media\s*=\s*({.+?});',
- webpage, 'vilos media', default='{}'), video_id)
- media_metadata = media.get('metadata') or {}
-
- language = self._search_regex(
- r'(?:vilos\.config\.player\.language|LOCALE)\s*=\s*(["\'])(?P<lang>(?:(?!\1).)+)\1',
- webpage, 'language', default=None, group='lang')
-
- video_title = self._html_search_regex(
- r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
- webpage, 'video_title')
- video_title = re.sub(r' {2,}', ' ', video_title)
- video_description = (self._parse_json(self._html_search_regex(
- r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
- webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
- if video_description:
- video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
- video_upload_date = self._html_search_regex(
- [r'<div>Availability for free users:(.+?)</div>', r'<div>[^<>]+<span>\s*(.+?\d{4})\s*</span></div>'],
- webpage, 'video_upload_date', fatal=False, flags=re.DOTALL)
- if video_upload_date:
- video_upload_date = unified_strdate(video_upload_date)
- video_uploader = self._html_search_regex(
- # try looking for both an uploader that's a link and one that's not
- [r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
- webpage, 'video_uploader', fatal=False)
-
- formats = []
- for stream in media.get('streams', []):
- audio_lang = stream.get('audio_lang')
- hardsub_lang = stream.get('hardsub_lang')
- vrv_formats = self._extract_vrv_formats(
- stream.get('url'), video_id, stream.get('format'),
- audio_lang, hardsub_lang)
- for f in vrv_formats:
- if not hardsub_lang:
- f['preference'] = 1
- language_preference = 0
- if audio_lang == language:
- language_preference += 1
- if hardsub_lang == language:
- language_preference += 1
- if language_preference:
- f['language_preference'] = language_preference
- formats.extend(vrv_formats)
- if not formats:
- available_fmts = []
- for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
- attrs = extract_attributes(a)
- href = attrs.get('href')
- if href and '/freetrial' in href:
- continue
- available_fmts.append(fmt)
- if not available_fmts:
- for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
- available_fmts = re.findall(p, webpage)
- if available_fmts:
- break
- if not available_fmts:
- available_fmts = self._FORMAT_IDS.keys()
- video_encode_ids = []
-
- for fmt in available_fmts:
- stream_quality, stream_format = self._FORMAT_IDS[fmt]
- video_format = fmt + 'p'
- stream_infos = []
- streamdata = self._call_rpc_api(
- 'VideoPlayer_GetStandardConfig', video_id,
- 'Downloading media info for %s' % video_format, data={
- 'media_id': video_id,
- 'video_format': stream_format,
- 'video_quality': stream_quality,
- 'current_page': url,
- })
- if isinstance(streamdata, compat_etree_Element):
- stream_info = streamdata.find('./{default}preload/stream_info')
- if stream_info is not None:
- stream_infos.append(stream_info)
- stream_info = self._call_rpc_api(
- 'VideoEncode_GetStreamInfo', video_id,
- 'Downloading stream info for %s' % video_format, data={
- 'media_id': video_id,
- 'video_format': stream_format,
- 'video_encode_quality': stream_quality,
- })
- if isinstance(stream_info, compat_etree_Element):
- stream_infos.append(stream_info)
- for stream_info in stream_infos:
- video_encode_id = xpath_text(stream_info, './video_encode_id')
- if video_encode_id in video_encode_ids:
- continue
- video_encode_ids.append(video_encode_id)
-
- video_file = xpath_text(stream_info, './file')
- if not video_file:
- continue
- if video_file.startswith('http'):
- formats.extend(self._extract_m3u8_formats(
- video_file, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
- continue
-
- video_url = xpath_text(stream_info, './host')
- if not video_url:
- continue
- metadata = stream_info.find('./metadata')
- format_info = {
- 'format': video_format,
- 'height': int_or_none(xpath_text(metadata, './height')),
- 'width': int_or_none(xpath_text(metadata, './width')),
- }
-
- if '.fplive.net/' in video_url:
- video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
- parsed_video_url = compat_urlparse.urlparse(video_url)
- direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
- netloc='v.lvlt.crcdn.net',
- path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
- if self._is_valid_url(direct_video_url, video_id, video_format):
- format_info.update({
- 'format_id': 'http-' + video_format,
- 'url': direct_video_url,
- })
- formats.append(format_info)
- continue
-
- format_info.update({
- 'format_id': 'rtmp-' + video_format,
- 'url': video_url,
- 'play_path': video_file,
- 'ext': 'flv',
- })
- formats.append(format_info)
- self._sort_formats(formats, ('preference', 'language_preference', 'height', 'width', 'tbr', 'fps'))
-
- metadata = self._call_rpc_api(
- 'VideoPlayer_GetMediaMetadata', video_id,
- note='Downloading media info', data={
- 'media_id': video_id,
- })
-
- subtitles = {}
- for subtitle in media.get('subtitles', []):
- subtitle_url = subtitle.get('url')
- if not subtitle_url:
- continue
- subtitles.setdefault(subtitle.get('language', 'enUS'), []).append({
- 'url': subtitle_url,
- 'ext': subtitle.get('format', 'ass'),
- })
- if not subtitles:
- subtitles = self.extract_subtitles(video_id, webpage)
-
- # webpage provide more accurate data than series_title from XML
- series = self._html_search_regex(
- r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
- webpage, 'series', fatal=False)
-
- season = episode = episode_number = duration = thumbnail = None
-
- if isinstance(metadata, compat_etree_Element):
- season = xpath_text(metadata, 'series_title')
- episode = xpath_text(metadata, 'episode_title')
- episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
- duration = float_or_none(media_metadata.get('duration'), 1000)
- thumbnail = xpath_text(metadata, 'episode_image_url')
-
- if not episode:
- episode = media_metadata.get('title')
- if not episode_number:
- episode_number = int_or_none(media_metadata.get('episode_number'))
- if not thumbnail:
- thumbnail = media_metadata.get('thumbnail', {}).get('url')
-
- season_number = int_or_none(self._search_regex(
- r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
- webpage, 'season number', default=None))
-
- return {
- 'id': video_id,
- 'title': video_title,
- 'description': video_description,
- 'duration': duration,
- 'thumbnail': thumbnail,
- 'uploader': video_uploader,
- 'upload_date': video_upload_date,
- 'series': series,
- 'season': season,
- 'season_number': season_number,
- 'episode': episode,
- 'episode_number': episode_number,
- 'subtitles': subtitles,
- 'formats': formats,
- }
-
-
-class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
- IE_NAME = 'crunchyroll:playlist'
- _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)'
-
- _TESTS = [{
- 'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
- 'info_dict': {
- 'id': 'a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
- 'title': 'A Bridge to the Starry Skies - Hoshizora e Kakaru Hashi'
- },
- 'playlist_count': 13,
- }, {
- # geo-restricted (US), 18+ maturity wall, non-premium available
- 'url': 'http://www.crunchyroll.com/cosplay-complex-ova',
- 'info_dict': {
- 'id': 'cosplay-complex-ova',
- 'title': 'Cosplay Complex OVA'
- },
- 'playlist_count': 3,
- 'skip': 'Georestricted',
- }, {
- # geo-restricted (US), 18+ maturity wall, non-premium will be available since 2015.11.14
- 'url': 'http://www.crunchyroll.com/ladies-versus-butlers?skip_wall=1',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- show_id = self._match_id(url)
-
- webpage = self._download_webpage(
- self._add_skip_wall(url), show_id,
- headers=self.geo_verification_headers())
- title = self._html_search_meta('name', webpage, default=None)
-
- episode_paths = re.findall(
- r'(?s)<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"',
- webpage)
- entries = [
- self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll', ep_id)
- for ep_id, ep in episode_paths
- ]
- entries.reverse()
-
- return {
- '_type': 'playlist',
- 'id': show_id,
- 'title': title,
- 'entries': entries,
- }
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
deleted file mode 100644
index 745971900..000000000
--- a/youtube_dl/extractor/dailymotion.py
+++ /dev/null
@@ -1,512 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import base64
-import functools
-import hashlib
-import itertools
-import json
-import random
-import re
-import string
-
-from .common import InfoExtractor
-from ..compat import compat_struct_pack
-from ..utils import (
- determine_ext,
- error_to_compat_str,
- ExtractorError,
- int_or_none,
- mimetype2ext,
- OnDemandPagedList,
- parse_iso8601,
- sanitized_Request,
- str_to_int,
- try_get,
- unescapeHTML,
- update_url_query,
- url_or_none,
- urlencode_postdata,
-)
-
-
-class DailymotionBaseInfoExtractor(InfoExtractor):
- @staticmethod
- def _build_request(url):
- """Build a request with the family filter disabled"""
- request = sanitized_Request(url)
- request.add_header('Cookie', 'family_filter=off; ff=off')
- return request
-
- def _download_webpage_handle_no_ff(self, url, *args, **kwargs):
- request = self._build_request(url)
- return self._download_webpage_handle(request, *args, **kwargs)
-
- def _download_webpage_no_ff(self, url, *args, **kwargs):
- request = self._build_request(url)
- return self._download_webpage(request, *args, **kwargs)
-
-
-class DailymotionIE(DailymotionBaseInfoExtractor):
- _VALID_URL = r'''(?ix)
- https?://
- (?:
- (?:(?:www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|\#)/)?video|swf)|
- (?:www\.)?lequipe\.fr/video
- )
- /(?P<id>[^/?_]+)
- '''
- IE_NAME = 'dailymotion'
-
- _FORMATS = [
- ('stream_h264_ld_url', 'ld'),
- ('stream_h264_url', 'standard'),
- ('stream_h264_hq_url', 'hq'),
- ('stream_h264_hd_url', 'hd'),
- ('stream_h264_hd1080_url', 'hd180'),
- ]
-
- _TESTS = [{
- 'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news',
- 'md5': '074b95bdee76b9e3654137aee9c79dfe',
- 'info_dict': {
- 'id': 'x5kesuj',
- 'ext': 'mp4',
- 'title': 'Office Christmas Party Review – Jason Bateman, Olivia Munn, T.J. Miller',
- 'description': 'Office Christmas Party Review - Jason Bateman, Olivia Munn, T.J. Miller',
- 'thumbnail': r're:^https?:.*\.(?:jpg|png)$',
- 'duration': 187,
- 'timestamp': 1493651285,
- 'upload_date': '20170501',
- 'uploader': 'Deadline',
- 'uploader_id': 'x1xm8ri',
- 'age_limit': 0,
- },
- }, {
- 'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
- 'md5': '2137c41a8e78554bb09225b8eb322406',
- 'info_dict': {
- 'id': 'x2iuewm',
- 'ext': 'mp4',
- 'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
- 'description': 'Several come bundled with the Steam Controller.',
- 'thumbnail': r're:^https?:.*\.(?:jpg|png)$',
- 'duration': 74,
- 'timestamp': 1425657362,
- 'upload_date': '20150306',
- 'uploader': 'IGN',
- 'uploader_id': 'xijv66',
- 'age_limit': 0,
- 'view_count': int,
- },
- 'skip': 'video gone',
- }, {
- # Vevo video
- 'url': 'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi',
- 'info_dict': {
- 'title': 'Roar (Official)',
- 'id': 'USUV71301934',
- 'ext': 'mp4',
- 'uploader': 'Katy Perry',
- 'upload_date': '20130905',
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'VEVO is only available in some countries',
- }, {
- # age-restricted video
- 'url': 'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
- 'md5': '0d667a7b9cebecc3c89ee93099c4159d',
- 'info_dict': {
- 'id': 'xyh2zz',
- 'ext': 'mp4',
- 'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
- 'uploader': 'HotWaves1012',
- 'age_limit': 18,
- },
- 'skip': 'video gone',
- }, {
- # geo-restricted, player v5
- 'url': 'http://www.dailymotion.com/video/xhza0o',
- 'only_matching': True,
- }, {
- # with subtitles
- 'url': 'http://www.dailymotion.com/video/x20su5f_the-power-of-nightmares-1-the-rise-of-the-politics-of-fear-bbc-2004_news',
- 'only_matching': True,
- }, {
- 'url': 'http://www.dailymotion.com/swf/video/x3n92nf',
- 'only_matching': True,
- }, {
- 'url': 'http://www.dailymotion.com/swf/x3ss1m_funny-magic-trick-barry-and-stuart_fun',
- 'only_matching': True,
- }, {
- 'url': 'https://www.lequipe.fr/video/x791mem',
- 'only_matching': True,
- }, {
- 'url': 'https://www.lequipe.fr/video/k7MtHciueyTcrFtFKA2',
- 'only_matching': True,
- }]
-
- @staticmethod
- def _extract_urls(webpage):
- urls = []
- # Look for embedded Dailymotion player
- # https://developer.dailymotion.com/player#player-parameters
- for mobj in re.finditer(
- r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage):
- urls.append(unescapeHTML(mobj.group('url')))
- for mobj in re.finditer(
- r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage):
- urls.append('https://www.dailymotion.com/embed/video/' + mobj.group('id'))
- return urls
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage_no_ff(
- 'https://www.dailymotion.com/video/%s' % video_id, video_id)
-
- age_limit = self._rta_search(webpage)
-
- description = self._og_search_description(
- webpage, default=None) or self._html_search_meta(
- 'description', webpage, 'description')
-
- view_count_str = self._search_regex(
- (r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:([\s\d,.]+)"',
- r'video_views_count[^>]+>\s+([\s\d\,.]+)'),
- webpage, 'view count', default=None)
- if view_count_str:
- view_count_str = re.sub(r'\s', '', view_count_str)
- view_count = str_to_int(view_count_str)
- comment_count = int_or_none(self._search_regex(
- r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserComments:(\d+)"',
- webpage, 'comment count', default=None))
-
- player_v5 = self._search_regex(
- [r'buildPlayer\(({.+?})\);\n', # See https://github.com/ytdl-org/youtube-dl/issues/7826
- r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);',
- r'buildPlayer\(({.+?})\);',
- r'var\s+config\s*=\s*({.+?});',
- # New layout regex (see https://github.com/ytdl-org/youtube-dl/issues/13580)
- r'__PLAYER_CONFIG__\s*=\s*({.+?});'],
- webpage, 'player v5', default=None)
- if player_v5:
- player = self._parse_json(player_v5, video_id, fatal=False) or {}
- metadata = try_get(player, lambda x: x['metadata'], dict)
- if not metadata:
- metadata_url = url_or_none(try_get(
- player, lambda x: x['context']['metadata_template_url1']))
- if metadata_url:
- metadata_url = metadata_url.replace(':videoId', video_id)
- else:
- metadata_url = update_url_query(
- 'https://www.dailymotion.com/player/metadata/video/%s'
- % video_id, {
- 'embedder': url,
- 'integration': 'inline',
- 'GK_PV5_NEON': '1',
- })
- metadata = self._download_json(
- metadata_url, video_id, 'Downloading metadata JSON')
-
- if try_get(metadata, lambda x: x['error']['type']) == 'password_protected':
- password = self._downloader.params.get('videopassword')
- if password:
- r = int(metadata['id'][1:], 36)
- us64e = lambda x: base64.urlsafe_b64encode(x).decode().strip('=')
- t = ''.join(random.choice(string.ascii_letters) for i in range(10))
- n = us64e(compat_struct_pack('I', r))
- i = us64e(hashlib.md5(('%s%d%s' % (password, r, t)).encode()).digest())
- metadata = self._download_json(
- 'http://www.dailymotion.com/player/metadata/video/p' + i + t + n, video_id)
-
- self._check_error(metadata)
-
- formats = []
- for quality, media_list in metadata['qualities'].items():
- for media in media_list:
- media_url = media.get('url')
- if not media_url:
- continue
- type_ = media.get('type')
- if type_ == 'application/vnd.lumberjack.manifest':
- continue
- ext = mimetype2ext(type_) or determine_ext(media_url)
- if ext == 'm3u8':
- m3u8_formats = self._extract_m3u8_formats(
- media_url, video_id, 'mp4', preference=-1,
- m3u8_id='hls', fatal=False)
- for f in m3u8_formats:
- f['url'] = f['url'].split('#')[0]
- formats.append(f)
- elif ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- media_url, video_id, preference=-1, f4m_id='hds', fatal=False))
- else:
- f = {
- 'url': media_url,
- 'format_id': 'http-%s' % quality,
- 'ext': ext,
- }
- m = re.search(r'H264-(?P<width>\d+)x(?P<height>\d+)', media_url)
- if m:
- f.update({
- 'width': int(m.group('width')),
- 'height': int(m.group('height')),
- })
- formats.append(f)
- self._sort_formats(formats)
-
- title = metadata['title']
- duration = int_or_none(metadata.get('duration'))
- timestamp = int_or_none(metadata.get('created_time'))
- thumbnail = metadata.get('poster_url')
- uploader = metadata.get('owner', {}).get('screenname')
- uploader_id = metadata.get('owner', {}).get('id')
-
- subtitles = {}
- subtitles_data = metadata.get('subtitles', {}).get('data', {})
- if subtitles_data and isinstance(subtitles_data, dict):
- for subtitle_lang, subtitle in subtitles_data.items():
- subtitles[subtitle_lang] = [{
- 'ext': determine_ext(subtitle_url),
- 'url': subtitle_url,
- } for subtitle_url in subtitle.get('urls', [])]
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'timestamp': timestamp,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'age_limit': age_limit,
- 'view_count': view_count,
- 'comment_count': comment_count,
- 'formats': formats,
- 'subtitles': subtitles,
- }
-
- # vevo embed
- vevo_id = self._search_regex(
- r'<link rel="video_src" href="[^"]*?vevo\.com[^"]*?video=(?P<id>[\w]*)',
- webpage, 'vevo embed', default=None)
- if vevo_id:
- return self.url_result('vevo:%s' % vevo_id, 'Vevo')
-
- # fallback old player
- embed_page = self._download_webpage_no_ff(
- 'https://www.dailymotion.com/embed/video/%s' % video_id,
- video_id, 'Downloading embed page')
-
- timestamp = parse_iso8601(self._html_search_meta(
- 'video:release_date', webpage, 'upload date'))
-
- info = self._parse_json(
- self._search_regex(
- r'var info = ({.*?}),$', embed_page,
- 'video info', flags=re.MULTILINE),
- video_id)
-
- self._check_error(info)
-
- formats = []
- for (key, format_id) in self._FORMATS:
- video_url = info.get(key)
- if video_url is not None:
- m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
- if m_size is not None:
- width, height = map(int_or_none, (m_size.group(1), m_size.group(2)))
- else:
- width, height = None, None
- formats.append({
- 'url': video_url,
- 'ext': 'mp4',
- 'format_id': format_id,
- 'width': width,
- 'height': height,
- })
- self._sort_formats(formats)
-
- # subtitles
- video_subtitles = self.extract_subtitles(video_id, webpage)
-
- title = self._og_search_title(webpage, default=None)
- if title is None:
- title = self._html_search_regex(
- r'(?s)<span\s+id="video_title"[^>]*>(.*?)</span>', webpage,
- 'title')
-
- return {
- 'id': video_id,
- 'formats': formats,
- 'uploader': info['owner.screenname'],
- 'timestamp': timestamp,
- 'title': title,
- 'description': description,
- 'subtitles': video_subtitles,
- 'thumbnail': info['thumbnail_url'],
- 'age_limit': age_limit,
- 'view_count': view_count,
- 'duration': info['duration']
- }
-
- def _check_error(self, info):
- error = info.get('error')
- if error:
- title = error.get('title') or error['message']
- # See https://developer.dailymotion.com/api#access-error
- if error.get('code') == 'DM007':
- self.raise_geo_restricted(msg=title)
- raise ExtractorError(
- '%s said: %s' % (self.IE_NAME, title), expected=True)
-
- def _get_subtitles(self, video_id, webpage):
- try:
- sub_list = self._download_webpage(
- 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
- video_id, note=False)
- except ExtractorError as err:
- self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
- return {}
- info = json.loads(sub_list)
- if (info['total'] > 0):
- sub_lang_list = dict((l['language'], [{'url': l['url'], 'ext': 'srt'}]) for l in info['list'])
- return sub_lang_list
- self._downloader.report_warning('video doesn\'t have subtitles')
- return {}
-
-
-class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
- IE_NAME = 'dailymotion:playlist'
- _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>x[0-9a-z]+)'
- _TESTS = [{
- 'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
- 'info_dict': {
- 'title': 'SPORT',
- 'id': 'xv4bw',
- },
- 'playlist_mincount': 20,
- }]
- _PAGE_SIZE = 100
-
- def _fetch_page(self, playlist_id, authorizaion, page):
- page += 1
- videos = self._download_json(
- 'https://graphql.api.dailymotion.com',
- playlist_id, 'Downloading page %d' % page,
- data=json.dumps({
- 'query': '''{
- collection(xid: "%s") {
- videos(first: %d, page: %d) {
- pageInfo {
- hasNextPage
- nextPage
- }
- edges {
- node {
- xid
- url
- }
- }
- }
- }
-}''' % (playlist_id, self._PAGE_SIZE, page)
- }).encode(), headers={
- 'Authorization': authorizaion,
- 'Origin': 'https://www.dailymotion.com',
- })['data']['collection']['videos']
- for edge in videos['edges']:
- node = edge['node']
- yield self.url_result(
- node['url'], DailymotionIE.ie_key(), node['xid'])
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
- webpage = self._download_webpage(url, playlist_id)
- api = self._parse_json(self._search_regex(
- r'__PLAYER_CONFIG__\s*=\s*({.+?});',
- webpage, 'player config'), playlist_id)['context']['api']
- auth = self._download_json(
- api.get('auth_url', 'https://graphql.api.dailymotion.com/oauth/token'),
- playlist_id, data=urlencode_postdata({
- 'client_id': api.get('client_id', 'f1a362d288c1b98099c7'),
- 'client_secret': api.get('client_secret', 'eea605b96e01c796ff369935357eca920c5da4c5'),
- 'grant_type': 'client_credentials',
- }))
- authorizaion = '%s %s' % (auth.get('token_type', 'Bearer'), auth['access_token'])
- entries = OnDemandPagedList(functools.partial(
- self._fetch_page, playlist_id, authorizaion), self._PAGE_SIZE)
- return self.playlist_result(
- entries, playlist_id,
- self._og_search_title(webpage))
-
-
-class DailymotionUserIE(DailymotionBaseInfoExtractor):
- IE_NAME = 'dailymotion:user'
- _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
- _MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
- _PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
- _TESTS = [{
- 'url': 'https://www.dailymotion.com/user/nqtv',
- 'info_dict': {
- 'id': 'nqtv',
- 'title': 'Rémi Gaillard',
- },
- 'playlist_mincount': 100,
- }, {
- 'url': 'http://www.dailymotion.com/user/UnderProject',
- 'info_dict': {
- 'id': 'UnderProject',
- 'title': 'UnderProject',
- },
- 'playlist_mincount': 1800,
- 'expected_warnings': [
- 'Stopped at duplicated page',
- ],
- 'skip': 'Takes too long time',
- }]
-
- def _extract_entries(self, id):
- video_ids = set()
- processed_urls = set()
- for pagenum in itertools.count(1):
- page_url = self._PAGE_TEMPLATE % (id, pagenum)
- webpage, urlh = self._download_webpage_handle_no_ff(
- page_url, id, 'Downloading page %s' % pagenum)
- if urlh.geturl() in processed_urls:
- self.report_warning('Stopped at duplicated page %s, which is the same as %s' % (
- page_url, urlh.geturl()), id)
- break
-
- processed_urls.add(urlh.geturl())
-
- for video_id in re.findall(r'data-xid="(.+?)"', webpage):
- if video_id not in video_ids:
- yield self.url_result(
- 'http://www.dailymotion.com/video/%s' % video_id,
- DailymotionIE.ie_key(), video_id)
- video_ids.add(video_id)
-
- if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
- break
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- user = mobj.group('user')
- webpage = self._download_webpage(
- 'https://www.dailymotion.com/user/%s' % user, user)
- full_user = unescapeHTML(self._html_search_regex(
- r'<a class="nav-image" title="([^"]+)" href="/%s">' % re.escape(user),
- webpage, 'user'))
-
- return {
- '_type': 'playlist',
- 'id': user,
- 'title': full_user,
- 'entries': self._extract_entries(user),
- }
diff --git a/youtube_dl/extractor/daisuki.py b/youtube_dl/extractor/daisuki.py
deleted file mode 100644
index dbc1aa5d4..000000000
--- a/youtube_dl/extractor/daisuki.py
+++ /dev/null
@@ -1,154 +0,0 @@
-from __future__ import unicode_literals
-
-import base64
-import json
-import random
-import re
-
-from .common import InfoExtractor
-from ..aes import (
- aes_cbc_decrypt,
- aes_cbc_encrypt,
-)
-from ..compat import compat_b64decode
-from ..utils import (
- bytes_to_intlist,
- bytes_to_long,
- extract_attributes,
- ExtractorError,
- intlist_to_bytes,
- js_to_json,
- int_or_none,
- long_to_bytes,
- pkcs1pad,
-)
-
-
-class DaisukiMottoIE(InfoExtractor):
- _VALID_URL = r'https?://motto\.daisuki\.net/framewatch/embed/[^/]+/(?P<id>[0-9a-zA-Z]{3})'
-
- _TEST = {
- 'url': 'http://motto.daisuki.net/framewatch/embed/embedDRAGONBALLSUPERUniverseSurvivalsaga/V2e/760/428',
- 'info_dict': {
- 'id': 'V2e',
- 'ext': 'mp4',
- 'title': '#117 SHOWDOWN OF LOVE! ANDROIDS VS UNIVERSE 2!!',
- 'subtitles': {
- 'mul': [{
- 'ext': 'ttml',
- }],
- },
- },
- 'params': {
- 'skip_download': True, # AES-encrypted HLS stream
- },
- }
-
- # The public key in PEM format can be found in clientlibs_anime_watch.min.js
- _RSA_KEY = (0xc5524c25e8e14b366b3754940beeb6f96cb7e2feef0b932c7659a0c5c3bf173d602464c2df73d693b513ae06ff1be8f367529ab30bf969c5640522181f2a0c51ea546ae120d3d8d908595e4eff765b389cde080a1ef7f1bbfb07411cc568db73b7f521cedf270cbfbe0ddbc29b1ac9d0f2d8f4359098caffee6d07915020077d, 65537)
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- flashvars = self._parse_json(self._search_regex(
- r'(?s)var\s+flashvars\s*=\s*({.+?});', webpage, 'flashvars'),
- video_id, transform_source=js_to_json)
-
- iv = [0] * 16
-
- data = {}
- for key in ('device_cd', 'mv_id', 'ss1_prm', 'ss2_prm', 'ss3_prm', 'ss_id'):
- data[key] = flashvars.get(key, '')
-
- encrypted_rtn = None
-
- # Some AES keys are rejected. Try it with different AES keys
- for idx in range(5):
- aes_key = [random.randint(0, 254) for _ in range(32)]
- padded_aeskey = intlist_to_bytes(pkcs1pad(aes_key, 128))
-
- n, e = self._RSA_KEY
- encrypted_aeskey = long_to_bytes(pow(bytes_to_long(padded_aeskey), e, n))
- init_data = self._download_json(
- 'http://motto.daisuki.net/fastAPI/bgn/init/',
- video_id, query={
- 's': flashvars.get('s', ''),
- 'c': flashvars.get('ss3_prm', ''),
- 'e': url,
- 'd': base64.b64encode(intlist_to_bytes(aes_cbc_encrypt(
- bytes_to_intlist(json.dumps(data)),
- aes_key, iv))).decode('ascii'),
- 'a': base64.b64encode(encrypted_aeskey).decode('ascii'),
- }, note='Downloading JSON metadata' + (' (try #%d)' % (idx + 1) if idx > 0 else ''))
-
- if 'rtn' in init_data:
- encrypted_rtn = init_data['rtn']
- break
-
- self._sleep(5, video_id)
-
- if encrypted_rtn is None:
- raise ExtractorError('Failed to fetch init data')
-
- rtn = self._parse_json(
- intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist(
- compat_b64decode(encrypted_rtn)),
- aes_key, iv)).decode('utf-8').rstrip('\0'),
- video_id)
-
- title = rtn['title_str']
-
- formats = self._extract_m3u8_formats(
- rtn['play_url'], video_id, ext='mp4', entry_protocol='m3u8_native')
-
- subtitles = {}
- caption_url = rtn.get('caption_url')
- if caption_url:
- # mul: multiple languages
- subtitles['mul'] = [{
- 'url': caption_url,
- 'ext': 'ttml',
- }]
-
- return {
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- 'subtitles': subtitles,
- }
-
-
-class DaisukiMottoPlaylistIE(InfoExtractor):
- _VALID_URL = r'https?://motto\.daisuki\.net/(?P<id>information)/'
-
- _TEST = {
- 'url': 'http://motto.daisuki.net/information/',
- 'info_dict': {
- 'title': 'DRAGON BALL SUPER',
- },
- 'playlist_mincount': 117,
- }
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
-
- webpage = self._download_webpage(url, playlist_id)
-
- entries = []
- for li in re.findall(r'(<li[^>]+?data-product_id="[a-zA-Z0-9]{3}"[^>]+>)', webpage):
- attr = extract_attributes(li)
- ad_id = attr.get('data-ad_id')
- product_id = attr.get('data-product_id')
- if ad_id and product_id:
- episode_id = attr.get('data-chapter')
- entries.append({
- '_type': 'url_transparent',
- 'url': 'http://motto.daisuki.net/framewatch/embed/%s/%s/760/428' % (ad_id, product_id),
- 'episode_id': episode_id,
- 'episode_number': int_or_none(episode_id),
- 'ie_key': 'DaisukiMotto',
- })
-
- return self.playlist_result(entries, playlist_title='DRAGON BALL SUPER')
diff --git a/youtube_dl/extractor/daum.py b/youtube_dl/extractor/daum.py
deleted file mode 100644
index 76f021892..000000000
--- a/youtube_dl/extractor/daum.py
+++ /dev/null
@@ -1,308 +0,0 @@
-# coding: utf-8
-
-from __future__ import unicode_literals
-
-import re
-import itertools
-
-from .common import InfoExtractor
-from ..compat import (
- compat_parse_qs,
- compat_urllib_parse_unquote,
- compat_urllib_parse_urlencode,
- compat_urlparse,
-)
-from ..utils import (
- int_or_none,
- str_to_int,
- xpath_text,
- unescapeHTML,
-)
-
-
-class DaumIE(InfoExtractor):
- _VALID_URL = r'https?://(?:(?:m\.)?tvpot\.daum\.net/v/|videofarm\.daum\.net/controller/player/VodPlayer\.swf\?vid=)(?P<id>[^?#&]+)'
- IE_NAME = 'daum.net'
-
- _TESTS = [{
- 'url': 'http://tvpot.daum.net/v/vab4dyeDBysyBssyukBUjBz',
- 'info_dict': {
- 'id': 'vab4dyeDBysyBssyukBUjBz',
- 'ext': 'mp4',
- 'title': '마크 헌트 vs 안토니오 실바',
- 'description': 'Mark Hunt vs Antonio Silva',
- 'upload_date': '20131217',
- 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
- 'duration': 2117,
- 'view_count': int,
- 'comment_count': int,
- },
- }, {
- 'url': 'http://m.tvpot.daum.net/v/65139429',
- 'info_dict': {
- 'id': '65139429',
- 'ext': 'mp4',
- 'title': '1297회, \'아빠 아들로 태어나길 잘 했어\' 민수, 감동의 눈물[아빠 어디가] 20150118',
- 'description': 'md5:79794514261164ff27e36a21ad229fc5',
- 'upload_date': '20150604',
- 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
- 'duration': 154,
- 'view_count': int,
- 'comment_count': int,
- },
- }, {
- 'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24',
- 'only_matching': True,
- }, {
- 'url': 'http://videofarm.daum.net/controller/player/VodPlayer.swf?vid=vwIpVpCQsT8%24&ref=',
- 'info_dict': {
- 'id': 'vwIpVpCQsT8$',
- 'ext': 'flv',
- 'title': '01-Korean War ( Trouble on the horizon )',
- 'description': '\nKorean War 01\nTrouble on the horizon\n전쟁의 먹구름',
- 'upload_date': '20080223',
- 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
- 'duration': 249,
- 'view_count': int,
- 'comment_count': int,
- },
- }, {
- # Requires dte_type=WEB (#9972)
- 'url': 'http://tvpot.daum.net/v/s3794Uf1NZeZ1qMpGpeqeRU',
- 'md5': 'a8917742069a4dd442516b86e7d66529',
- 'info_dict': {
- 'id': 's3794Uf1NZeZ1qMpGpeqeRU',
- 'ext': 'mp4',
- 'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny) [쇼! 음악중심] 508회 20160611',
- 'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\n\n[쇼! 음악중심] 20160611, 507회',
- 'upload_date': '20160611',
- },
- }]
-
- def _real_extract(self, url):
- video_id = compat_urllib_parse_unquote(self._match_id(url))
- movie_data = self._download_json(
- 'http://videofarm.daum.net/controller/api/closed/v1_2/IntegratedMovieData.json',
- video_id, 'Downloading video formats info', query={'vid': video_id, 'dte_type': 'WEB'})
-
- # For urls like http://m.tvpot.daum.net/v/65139429, where the video_id is really a clipid
- if not movie_data.get('output_list', {}).get('output_list') and re.match(r'^\d+$', video_id):
- return self.url_result('http://tvpot.daum.net/clip/ClipView.do?clipid=%s' % video_id)
-
- info = self._download_xml(
- 'http://tvpot.daum.net/clip/ClipInfoXml.do', video_id,
- 'Downloading video info', query={'vid': video_id})
-
- formats = []
- for format_el in movie_data['output_list']['output_list']:
- profile = format_el['profile']
- format_query = compat_urllib_parse_urlencode({
- 'vid': video_id,
- 'profile': profile,
- })
- url_doc = self._download_xml(
- 'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
- video_id, note='Downloading video data for %s format' % profile)
- format_url = url_doc.find('result/url').text
- formats.append({
- 'url': format_url,
- 'format_id': profile,
- 'width': int_or_none(format_el.get('width')),
- 'height': int_or_none(format_el.get('height')),
- 'filesize': int_or_none(format_el.get('filesize')),
- })
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': info.find('TITLE').text,
- 'formats': formats,
- 'thumbnail': xpath_text(info, 'THUMB_URL'),
- 'description': xpath_text(info, 'CONTENTS'),
- 'duration': int_or_none(xpath_text(info, 'DURATION')),
- 'upload_date': info.find('REGDTTM').text[:8],
- 'view_count': str_to_int(xpath_text(info, 'PLAY_CNT')),
- 'comment_count': str_to_int(xpath_text(info, 'COMMENT_CNT')),
- }
-
-
-class DaumClipIE(InfoExtractor):
- _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.(?:do|tv)|mypot/View.do)\?.*?clipid=(?P<id>\d+)'
- IE_NAME = 'daum.net:clip'
- _URL_TEMPLATE = 'http://tvpot.daum.net/clip/ClipView.do?clipid=%s'
-
- _TESTS = [{
- 'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
- 'info_dict': {
- 'id': '52554690',
- 'ext': 'mp4',
- 'title': 'DOTA 2GETHER 시즌2 6회 - 2부',
- 'description': 'DOTA 2GETHER 시즌2 6회 - 2부',
- 'upload_date': '20130831',
- 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
- 'duration': 3868,
- 'view_count': int,
- },
- }, {
- 'url': 'http://m.tvpot.daum.net/clip/ClipView.tv?clipid=54999425',
- 'only_matching': True,
- }]
-
- @classmethod
- def suitable(cls, url):
- return False if DaumPlaylistIE.suitable(url) or DaumUserIE.suitable(url) else super(DaumClipIE, cls).suitable(url)
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- clip_info = self._download_json(
- 'http://tvpot.daum.net/mypot/json/GetClipInfo.do?clipid=%s' % video_id,
- video_id, 'Downloading clip info')['clip_bean']
-
- return {
- '_type': 'url_transparent',
- 'id': video_id,
- 'url': 'http://tvpot.daum.net/v/%s' % clip_info['vid'],
- 'title': unescapeHTML(clip_info['title']),
- 'thumbnail': clip_info.get('thumb_url'),
- 'description': clip_info.get('contents'),
- 'duration': int_or_none(clip_info.get('duration')),
- 'upload_date': clip_info.get('up_date')[:8],
- 'view_count': int_or_none(clip_info.get('play_count')),
- 'ie_key': 'Daum',
- }
-
-
-class DaumListIE(InfoExtractor):
- def _get_entries(self, list_id, list_id_type):
- name = None
- entries = []
- for pagenum in itertools.count(1):
- list_info = self._download_json(
- 'http://tvpot.daum.net/mypot/json/GetClipInfo.do?size=48&init=true&order=date&page=%d&%s=%s' % (
- pagenum, list_id_type, list_id), list_id, 'Downloading list info - %s' % pagenum)
-
- entries.extend([
- self.url_result(
- 'http://tvpot.daum.net/v/%s' % clip['vid'])
- for clip in list_info['clip_list']
- ])
-
- if not name:
- name = list_info.get('playlist_bean', {}).get('name') or \
- list_info.get('potInfo', {}).get('name')
-
- if not list_info.get('has_more'):
- break
-
- return name, entries
-
- def _check_clip(self, url, list_id):
- query_dict = compat_parse_qs(compat_urlparse.urlparse(url).query)
- if 'clipid' in query_dict:
- clip_id = query_dict['clipid'][0]
- if self._downloader.params.get('noplaylist'):
- self.to_screen('Downloading just video %s because of --no-playlist' % clip_id)
- return self.url_result(DaumClipIE._URL_TEMPLATE % clip_id, 'DaumClip')
- else:
- self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % list_id)
-
-
-class DaumPlaylistIE(DaumListIE):
- _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/mypot/(?:View\.do|Top\.tv)\?.*?playlistid=(?P<id>[0-9]+)'
- IE_NAME = 'daum.net:playlist'
- _URL_TEMPLATE = 'http://tvpot.daum.net/mypot/View.do?playlistid=%s'
-
- _TESTS = [{
- 'note': 'Playlist url with clipid',
- 'url': 'http://tvpot.daum.net/mypot/View.do?playlistid=6213966&clipid=73806844',
- 'info_dict': {
- 'id': '6213966',
- 'title': 'Woorissica Official',
- },
- 'playlist_mincount': 181
- }, {
- 'note': 'Playlist url with clipid - noplaylist',
- 'url': 'http://tvpot.daum.net/mypot/View.do?playlistid=6213966&clipid=73806844',
- 'info_dict': {
- 'id': '73806844',
- 'ext': 'mp4',
- 'title': '151017 Airport',
- 'upload_date': '20160117',
- },
- 'params': {
- 'noplaylist': True,
- 'skip_download': True,
- }
- }]
-
- @classmethod
- def suitable(cls, url):
- return False if DaumUserIE.suitable(url) else super(DaumPlaylistIE, cls).suitable(url)
-
- def _real_extract(self, url):
- list_id = self._match_id(url)
-
- clip_result = self._check_clip(url, list_id)
- if clip_result:
- return clip_result
-
- name, entries = self._get_entries(list_id, 'playlistid')
-
- return self.playlist_result(entries, list_id, name)
-
-
-class DaumUserIE(DaumListIE):
- _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/mypot/(?:View|Top)\.(?:do|tv)\?.*?ownerid=(?P<id>[0-9a-zA-Z]+)'
- IE_NAME = 'daum.net:user'
-
- _TESTS = [{
- 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0',
- 'info_dict': {
- 'id': 'o2scDLIVbHc0',
- 'title': '마이 리틀 텔레비전',
- },
- 'playlist_mincount': 213
- }, {
- 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0&clipid=73801156',
- 'info_dict': {
- 'id': '73801156',
- 'ext': 'mp4',
- 'title': '[미공개] 김구라, 오만석이 부릅니다 \'오케피\' - 마이 리틀 텔레비전 20160116',
- 'upload_date': '20160117',
- 'description': 'md5:5e91d2d6747f53575badd24bd62b9f36'
- },
- 'params': {
- 'noplaylist': True,
- 'skip_download': True,
- }
- }, {
- 'note': 'Playlist url has ownerid and playlistid, playlistid takes precedence',
- 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0&playlistid=6196631',
- 'info_dict': {
- 'id': '6196631',
- 'title': '마이 리틀 텔레비전 - 20160109',
- },
- 'playlist_count': 11
- }, {
- 'url': 'http://tvpot.daum.net/mypot/Top.do?ownerid=o2scDLIVbHc0',
- 'only_matching': True,
- }, {
- 'url': 'http://m.tvpot.daum.net/mypot/Top.tv?ownerid=45x1okb1If50&playlistid=3569733',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- list_id = self._match_id(url)
-
- clip_result = self._check_clip(url, list_id)
- if clip_result:
- return clip_result
-
- query_dict = compat_parse_qs(compat_urlparse.urlparse(url).query)
- if 'playlistid' in query_dict:
- playlist_id = query_dict['playlistid'][0]
- return self.url_result(DaumPlaylistIE._URL_TEMPLATE % playlist_id, 'DaumPlaylist')
-
- name, entries = self._get_entries(list_id, 'ownerid')
-
- return self.playlist_result(entries, list_id, name)
diff --git a/youtube_dl/extractor/dctp.py b/youtube_dl/extractor/dctp.py
deleted file mode 100644
index 04ff214f7..000000000
--- a/youtube_dl/extractor/dctp.py
+++ /dev/null
@@ -1,115 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- float_or_none,
- int_or_none,
- unified_timestamp,
- url_or_none,
-)
-
-
-class DctpTvIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
- _TESTS = [{
- # 4x3
- 'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
- 'info_dict': {
- 'id': '95eaa4f33dad413aa17b4ee613cccc6c',
- 'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
- 'ext': 'flv',
- 'title': 'Videoinstallation für eine Kaufhausfassade',
- 'description': 'Kurzfilm',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 71.24,
- 'timestamp': 1302172322,
- 'upload_date': '20110407',
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
- }, {
- # 16x9
- 'url': 'http://www.dctp.tv/filme/sind-youtuber-die-besseren-lehrer/',
- 'only_matching': True,
- }]
-
- _BASE_URL = 'http://dctp-ivms2-restapi.s3.amazonaws.com'
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- version = self._download_json(
- '%s/version.json' % self._BASE_URL, display_id,
- 'Downloading version JSON')
-
- restapi_base = '%s/%s/restapi' % (
- self._BASE_URL, version['version_name'])
-
- info = self._download_json(
- '%s/slugs/%s.json' % (restapi_base, display_id), display_id,
- 'Downloading video info JSON')
-
- media = self._download_json(
- '%s/media/%s.json' % (restapi_base, compat_str(info['object_id'])),
- display_id, 'Downloading media JSON')
-
- uuid = media['uuid']
- title = media['title']
- ratio = '16x9' if media.get('is_wide') else '4x3'
- play_path = 'mp4:%s_dctp_0500_%s.m4v' % (uuid, ratio)
-
- servers = self._download_json(
- 'http://www.dctp.tv/streaming_servers/', display_id,
- note='Downloading server list JSON', fatal=False)
-
- if servers:
- endpoint = next(
- server['endpoint']
- for server in servers
- if url_or_none(server.get('endpoint'))
- and 'cloudfront' in server['endpoint'])
- else:
- endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'
-
- app = self._search_regex(
- r'^rtmpe?://[^/]+/(?P<app>.*)$', endpoint, 'app')
-
- formats = [{
- 'url': endpoint,
- 'app': app,
- 'play_path': play_path,
- 'page_url': url,
- 'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-110.swf',
- 'ext': 'flv',
- }]
-
- thumbnails = []
- images = media.get('images')
- if isinstance(images, list):
- for image in images:
- if not isinstance(image, dict):
- continue
- image_url = url_or_none(image.get('url'))
- if not image_url:
- continue
- thumbnails.append({
- 'url': image_url,
- 'width': int_or_none(image.get('width')),
- 'height': int_or_none(image.get('height')),
- })
-
- return {
- 'id': uuid,
- 'display_id': display_id,
- 'title': title,
- 'alt_title': media.get('subtitle'),
- 'description': media.get('description') or media.get('teaser'),
- 'timestamp': unified_timestamp(media.get('created')),
- 'duration': float_or_none(media.get('duration_in_ms'), scale=1000),
- 'thumbnails': thumbnails,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/discovery.py b/youtube_dl/extractor/discovery.py
deleted file mode 100644
index 6a2712cc5..000000000
--- a/youtube_dl/extractor/discovery.py
+++ /dev/null
@@ -1,119 +0,0 @@
-from __future__ import unicode_literals
-
-import random
-import re
-import string
-
-from .discoverygo import DiscoveryGoBaseIE
-from ..compat import compat_urllib_parse_unquote
-from ..utils import ExtractorError
-from ..compat import compat_HTTPError
-
-
-class DiscoveryIE(DiscoveryGoBaseIE):
- _VALID_URL = r'''(?x)https?://
- (?P<site>
- (?:(?:www|go)\.)?discovery|
- (?:www\.)?
- (?:
- investigationdiscovery|
- discoverylife|
- animalplanet|
- ahctv|
- destinationamerica|
- sciencechannel|
- tlc|
- velocity
- )|
- watch\.
- (?:
- hgtv|
- foodnetwork|
- travelchannel|
- diynetwork|
- cookingchanneltv|
- motortrend
- )
- )\.com/tv-shows/(?P<show_slug>[^/]+)/(?:video|full-episode)s/(?P<id>[^./?#]+)'''
- _TESTS = [{
- 'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry',
- 'info_dict': {
- 'id': '5a2f35ce6b66d17a5026e29e',
- 'ext': 'mp4',
- 'title': 'Riding with Matthew Perry',
- 'description': 'md5:a34333153e79bc4526019a5129e7f878',
- 'duration': 84,
- },
- 'params': {
- 'skip_download': True, # requires ffmpeg
- }
- }, {
- 'url': 'https://www.investigationdiscovery.com/tv-shows/final-vision/full-episodes/final-vision',
- 'only_matching': True,
- }, {
- 'url': 'https://go.discovery.com/tv-shows/alaskan-bush-people/videos/follow-your-own-road',
- 'only_matching': True,
- }, {
- # using `show_slug` is important to get the correct video data
- 'url': 'https://www.sciencechannel.com/tv-shows/mythbusters-on-science/full-episodes/christmas-special',
- 'only_matching': True,
- }]
- _GEO_COUNTRIES = ['US']
- _GEO_BYPASS = False
- _API_BASE_URL = 'https://api.discovery.com/v1/'
-
- def _real_extract(self, url):
- site, show_slug, display_id = re.match(self._VALID_URL, url).groups()
-
- access_token = None
- cookies = self._get_cookies(url)
-
- # prefer Affiliate Auth Token over Anonymous Auth Token
- auth_storage_cookie = cookies.get('eosAf') or cookies.get('eosAn')
- if auth_storage_cookie and auth_storage_cookie.value:
- auth_storage = self._parse_json(compat_urllib_parse_unquote(
- compat_urllib_parse_unquote(auth_storage_cookie.value)),
- display_id, fatal=False) or {}
- access_token = auth_storage.get('a') or auth_storage.get('access_token')
-
- if not access_token:
- access_token = self._download_json(
- 'https://%s.com/anonymous' % site, display_id,
- 'Downloading token JSON metadata', query={
- 'authRel': 'authorization',
- 'client_id': '3020a40c2356a645b4b4',
- 'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
- 'redirectUri': 'https://fusion.ddmcdn.com/app/mercury-sdk/180/redirectHandler.html?https://www.%s.com' % site,
- })['access_token']
-
- headers = self.geo_verification_headers()
- headers['Authorization'] = 'Bearer ' + access_token
-
- try:
- video = self._download_json(
- self._API_BASE_URL + 'content/videos',
- display_id, 'Downloading content JSON metadata',
- headers=headers, query={
- 'embed': 'show.name',
- 'fields': 'authenticated,description.detailed,duration,episodeNumber,id,name,parental.rating,season.number,show,tags',
- 'slug': display_id,
- 'show_slug': show_slug,
- })[0]
- video_id = video['id']
- stream = self._download_json(
- self._API_BASE_URL + 'streaming/video/' + video_id,
- display_id, 'Downloading streaming JSON metadata', headers=headers)
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
- e_description = self._parse_json(
- e.cause.read().decode(), display_id)['description']
- if 'resource not available for country' in e_description:
- self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
- if 'Authorized Networks' in e_description:
- raise ExtractorError(
- 'This video is only available via cable service provider subscription that'
- ' is not currently supported. You may want to use --cookies.', expected=True)
- raise ExtractorError(e_description)
- raise
-
- return self._extract_video_info(video, stream, display_id)
diff --git a/youtube_dl/extractor/discoverynetworks.py b/youtube_dl/extractor/discoverynetworks.py
deleted file mode 100644
index fba1ef221..000000000
--- a/youtube_dl/extractor/discoverynetworks.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .brightcove import BrightcoveLegacyIE
-from .dplay import DPlayIE
-from ..compat import (
- compat_parse_qs,
- compat_urlparse,
-)
-from ..utils import smuggle_url
-
-
-class DiscoveryNetworksDeIE(DPlayIE):
- _VALID_URL = r'''(?x)https?://(?:www\.)?(?P<site>discovery|tlc|animalplanet|dmax)\.de/
- (?:
- .*\#(?P<id>\d+)|
- (?:[^/]+/)*videos/(?P<display_id>[^/?#]+)|
- programme/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)
- )'''
-
- _TESTS = [{
- 'url': 'http://www.tlc.de/sendungen/breaking-amish/videos/#3235167922001',
- 'info_dict': {
- 'id': '3235167922001',
- 'ext': 'mp4',
- 'title': 'Breaking Amish: Die Welt da draußen',
- 'description': (
- 'Vier Amische und eine Mennonitin wagen in New York'
- ' den Sprung in ein komplett anderes Leben. Begleitet sie auf'
- ' ihrem spannenden Weg.'),
- 'timestamp': 1396598084,
- 'upload_date': '20140404',
- 'uploader_id': '1659832546',
- },
- }, {
- 'url': 'http://www.dmax.de/programme/storage-hunters-uk/videos/storage-hunters-uk-episode-6/',
- 'only_matching': True,
- }, {
- 'url': 'http://www.discovery.de/#5332316765001',
- 'only_matching': True,
- }]
- BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1659832546/default_default/index.html?videoId=%s'
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- alternate_id = mobj.group('alternate_id')
- if alternate_id:
- self._initialize_geo_bypass({
- 'countries': ['DE'],
- })
- return self._get_disco_api_info(
- url, '%s/%s' % (mobj.group('programme'), alternate_id),
- 'sonic-eu1-prod.disco-api.com', mobj.group('site') + 'de')
- brightcove_id = mobj.group('id')
- if not brightcove_id:
- title = mobj.group('title')
- webpage = self._download_webpage(url, title)
- brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
- brightcove_id = compat_parse_qs(compat_urlparse.urlparse(
- brightcove_legacy_url).query)['@videoPlayer'][0]
- return self.url_result(smuggle_url(
- self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {'geo_countries': ['DE']}),
- 'BrightcoveNew', brightcove_id)
diff --git a/youtube_dl/extractor/dplay.py b/youtube_dl/extractor/dplay.py
deleted file mode 100644
index ebf59512c..000000000
--- a/youtube_dl/extractor/dplay.py
+++ /dev/null
@@ -1,376 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import json
-import re
-import time
-
-from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_str,
- compat_urlparse,
-)
-from ..utils import (
- determine_ext,
- ExtractorError,
- float_or_none,
- int_or_none,
- remove_end,
- try_get,
- unified_strdate,
- unified_timestamp,
- update_url_query,
- urljoin,
- USER_AGENTS,
-)
-
-
-class DPlayIE(InfoExtractor):
- _VALID_URL = r'https?://(?P<domain>www\.(?P<host>dplay\.(?P<country>dk|se|no)))/(?:video(?:er|s)/)?(?P<id>[^/]+/[^/?#]+)'
-
- _TESTS = [{
- # non geo restricted, via secure api, unsigned download hls URL
- 'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/',
- 'info_dict': {
- 'id': '3172',
- 'display_id': 'nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet',
- 'ext': 'mp4',
- 'title': 'Svensken lär sig njuta av livet',
- 'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8',
- 'duration': 2650,
- 'timestamp': 1365454320,
- 'upload_date': '20130408',
- 'creator': 'Kanal 5 (Home)',
- 'series': 'Nugammalt - 77 händelser som format Sverige',
- 'season_number': 1,
- 'episode_number': 1,
- 'age_limit': 0,
- },
- }, {
- # geo restricted, via secure api, unsigned download hls URL
- 'url': 'http://www.dplay.dk/mig-og-min-mor/season-6-episode-12/',
- 'info_dict': {
- 'id': '70816',
- 'display_id': 'mig-og-min-mor/season-6-episode-12',
- 'ext': 'mp4',
- 'title': 'Episode 12',
- 'description': 'md5:9c86e51a93f8a4401fc9641ef9894c90',
- 'duration': 2563,
- 'timestamp': 1429696800,
- 'upload_date': '20150422',
- 'creator': 'Kanal 4 (Home)',
- 'series': 'Mig og min mor',
- 'season_number': 6,
- 'episode_number': 12,
- 'age_limit': 0,
- },
- }, {
- # geo restricted, via direct unsigned hls URL
- 'url': 'http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/',
- 'only_matching': True,
- }, {
- # disco-api
- 'url': 'https://www.dplay.no/videoer/i-kongens-klr/sesong-1-episode-7',
- 'info_dict': {
- 'id': '40206',
- 'display_id': 'i-kongens-klr/sesong-1-episode-7',
- 'ext': 'mp4',
- 'title': 'Episode 7',
- 'description': 'md5:e3e1411b2b9aebeea36a6ec5d50c60cf',
- 'duration': 2611.16,
- 'timestamp': 1516726800,
- 'upload_date': '20180123',
- 'series': 'I kongens klær',
- 'season_number': 1,
- 'episode_number': 7,
- },
- 'params': {
- 'format': 'bestvideo',
- 'skip_download': True,
- },
- }, {
-
- 'url': 'https://www.dplay.dk/videoer/singleliv/season-5-episode-3',
- 'only_matching': True,
- }, {
- 'url': 'https://www.dplay.se/videos/sofias-anglar/sofias-anglar-1001',
- 'only_matching': True,
- }]
-
- def _get_disco_api_info(self, url, display_id, disco_host, realm):
- disco_base = 'https://' + disco_host
- token = self._download_json(
- '%s/token' % disco_base, display_id, 'Downloading token',
- query={
- 'realm': realm,
- })['data']['attributes']['token']
- headers = {
- 'Referer': url,
- 'Authorization': 'Bearer ' + token,
- }
- video = self._download_json(
- '%s/content/videos/%s' % (disco_base, display_id), display_id,
- headers=headers, query={
- 'include': 'show'
- })
- video_id = video['data']['id']
- info = video['data']['attributes']
- title = info['name']
- formats = []
- for format_id, format_dict in self._download_json(
- '%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id),
- display_id, headers=headers)['data']['attributes']['streaming'].items():
- if not isinstance(format_dict, dict):
- continue
- format_url = format_dict.get('url')
- if not format_url:
- continue
- ext = determine_ext(format_url)
- if format_id == 'dash' or ext == 'mpd':
- formats.extend(self._extract_mpd_formats(
- format_url, display_id, mpd_id='dash', fatal=False))
- elif format_id == 'hls' or ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- format_url, display_id, 'mp4',
- entry_protocol='m3u8_native', m3u8_id='hls',
- fatal=False))
- else:
- formats.append({
- 'url': format_url,
- 'format_id': format_id,
- })
- self._sort_formats(formats)
-
- series = None
- try:
- included = video.get('included')
- if isinstance(included, list):
- show = next(e for e in included if e.get('type') == 'show')
- series = try_get(
- show, lambda x: x['attributes']['name'], compat_str)
- except StopIteration:
- pass
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': info.get('description'),
- 'duration': float_or_none(
- info.get('videoDuration'), scale=1000),
- 'timestamp': unified_timestamp(info.get('publishStart')),
- 'series': series,
- 'season_number': int_or_none(info.get('seasonNumber')),
- 'episode_number': int_or_none(info.get('episodeNumber')),
- 'age_limit': int_or_none(info.get('minimum_age')),
- 'formats': formats,
- }
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- display_id = mobj.group('id')
- domain = mobj.group('domain')
-
- self._initialize_geo_bypass({
- 'countries': [mobj.group('country').upper()],
- })
-
- webpage = self._download_webpage(url, display_id)
-
- video_id = self._search_regex(
- r'data-video-id=["\'](\d+)', webpage, 'video id', default=None)
-
- if not video_id:
- host = mobj.group('host')
- return self._get_disco_api_info(
- url, display_id, 'disco-api.' + host, host.replace('.', ''))
-
- info = self._download_json(
- 'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id),
- video_id)['data'][0]
-
- title = info['title']
-
- PROTOCOLS = ('hls', 'hds')
- formats = []
-
- def extract_formats(protocol, manifest_url):
- if protocol == 'hls':
- m3u8_formats = self._extract_m3u8_formats(
- manifest_url, video_id, ext='mp4',
- entry_protocol='m3u8_native', m3u8_id=protocol, fatal=False)
- # Sometimes final URLs inside m3u8 are unsigned, let's fix this
- # ourselves. Also fragments' URLs are only served signed for
- # Safari user agent.
- query = compat_urlparse.parse_qs(compat_urlparse.urlparse(manifest_url).query)
- for m3u8_format in m3u8_formats:
- m3u8_format.update({
- 'url': update_url_query(m3u8_format['url'], query),
- 'http_headers': {
- 'User-Agent': USER_AGENTS['Safari'],
- },
- })
- formats.extend(m3u8_formats)
- elif protocol == 'hds':
- formats.extend(self._extract_f4m_formats(
- manifest_url + '&hdcore=3.8.0&plugin=flowplayer-3.8.0.0',
- video_id, f4m_id=protocol, fatal=False))
-
- domain_tld = domain.split('.')[-1]
- if domain_tld in ('se', 'dk', 'no'):
- for protocol in PROTOCOLS:
- # Providing dsc-geo allows to bypass geo restriction in some cases
- self._set_cookie(
- 'secure.dplay.%s' % domain_tld, 'dsc-geo',
- json.dumps({
- 'countryCode': domain_tld.upper(),
- 'expiry': (time.time() + 20 * 60) * 1000,
- }))
- stream = self._download_json(
- 'https://secure.dplay.%s/secure/api/v2/user/authorization/stream/%s?stream_type=%s'
- % (domain_tld, video_id, protocol), video_id,
- 'Downloading %s stream JSON' % protocol, fatal=False)
- if stream and stream.get(protocol):
- extract_formats(protocol, stream[protocol])
-
- # The last resort is to try direct unsigned hls/hds URLs from info dictionary.
- # Sometimes this does work even when secure API with dsc-geo has failed (e.g.
- # http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/).
- if not formats:
- for protocol in PROTOCOLS:
- if info.get(protocol):
- extract_formats(protocol, info[protocol])
-
- self._sort_formats(formats)
-
- subtitles = {}
- for lang in ('se', 'sv', 'da', 'nl', 'no'):
- for format_id in ('web_vtt', 'vtt', 'srt'):
- subtitle_url = info.get('subtitles_%s_%s' % (lang, format_id))
- if subtitle_url:
- subtitles.setdefault(lang, []).append({'url': subtitle_url})
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': info.get('video_metadata_longDescription'),
- 'duration': int_or_none(info.get('video_metadata_length'), scale=1000),
- 'timestamp': int_or_none(info.get('video_publish_date')),
- 'creator': info.get('video_metadata_homeChannel'),
- 'series': info.get('video_metadata_show'),
- 'season_number': int_or_none(info.get('season')),
- 'episode_number': int_or_none(info.get('episode')),
- 'age_limit': int_or_none(info.get('minimum_age')),
- 'formats': formats,
- 'subtitles': subtitles,
- }
-
-
-class DPlayItIE(InfoExtractor):
- _VALID_URL = r'https?://it\.dplay\.com/[^/]+/[^/]+/(?P<id>[^/?#]+)'
- _GEO_COUNTRIES = ['IT']
- _TEST = {
- 'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/',
- 'md5': '2b808ffb00fc47b884a172ca5d13053c',
- 'info_dict': {
- 'id': '6918',
- 'display_id': 'luigi-di-maio-la-psicosi-di-stanislawskij',
- 'ext': 'mp4',
- 'title': 'Biografie imbarazzanti: Luigi Di Maio: la psicosi di Stanislawskij',
- 'description': 'md5:3c7a4303aef85868f867a26f5cc14813',
- 'thumbnail': r're:^https?://.*\.jpe?g',
- 'upload_date': '20160524',
- 'series': 'Biografie imbarazzanti',
- 'season_number': 1,
- 'episode': 'Luigi Di Maio: la psicosi di Stanislawskij',
- 'episode_number': 1,
- },
- }
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- title = remove_end(self._og_search_title(webpage), ' | Dplay')
-
- video_id = None
-
- info = self._search_regex(
- r'playback_json\s*:\s*JSON\.parse\s*\(\s*("(?:\\.|[^"\\])+?")',
- webpage, 'playback JSON', default=None)
- if info:
- for _ in range(2):
- info = self._parse_json(info, display_id, fatal=False)
- if not info:
- break
- else:
- video_id = try_get(info, lambda x: x['data']['id'])
-
- if not info:
- info_url = self._search_regex(
- (r'playback_json_url\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
- r'url\s*[:=]\s*["\'](?P<url>(?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)'),
- webpage, 'info url', group='url')
-
- info_url = urljoin(url, info_url)
- video_id = info_url.rpartition('/')[-1]
-
- try:
- info = self._download_json(
- info_url, display_id, headers={
- 'Authorization': 'Bearer %s' % self._get_cookies(url).get(
- 'dplayit_token').value,
- 'Referer': url,
- })
- if isinstance(info, compat_str):
- info = self._parse_json(info, display_id)
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403):
- info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
- error = info['errors'][0]
- if error.get('code') == 'access.denied.geoblocked':
- self.raise_geo_restricted(
- msg=error.get('detail'), countries=self._GEO_COUNTRIES)
- raise ExtractorError(info['errors'][0]['detail'], expected=True)
- raise
-
- hls_url = info['data']['attributes']['streaming']['hls']['url']
-
- formats = self._extract_m3u8_formats(
- hls_url, display_id, ext='mp4', entry_protocol='m3u8_native',
- m3u8_id='hls')
- self._sort_formats(formats)
-
- series = self._html_search_regex(
- r'(?s)<h1[^>]+class=["\'].*?\bshow_title\b.*?["\'][^>]*>(.+?)</h1>',
- webpage, 'series', fatal=False)
- episode = self._search_regex(
- r'<p[^>]+class=["\'].*?\bdesc_ep\b.*?["\'][^>]*>\s*<br/>\s*<b>([^<]+)',
- webpage, 'episode', fatal=False)
-
- mobj = re.search(
- r'(?s)<span[^>]+class=["\']dates["\'][^>]*>.+?\bS\.(?P<season_number>\d+)\s+E\.(?P<episode_number>\d+)\s*-\s*(?P<upload_date>\d{2}/\d{2}/\d{4})',
- webpage)
- if mobj:
- season_number = int(mobj.group('season_number'))
- episode_number = int(mobj.group('episode_number'))
- upload_date = unified_strdate(mobj.group('upload_date'))
- else:
- season_number = episode_number = upload_date = None
-
- return {
- 'id': compat_str(video_id or display_id),
- 'display_id': display_id,
- 'title': title,
- 'description': self._og_search_description(webpage),
- 'thumbnail': self._og_search_thumbnail(webpage),
- 'series': series,
- 'season_number': season_number,
- 'episode': episode,
- 'episode_number': episode_number,
- 'upload_date': upload_date,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/dreisat.py b/youtube_dl/extractor/dreisat.py
deleted file mode 100644
index 848d387d1..000000000
--- a/youtube_dl/extractor/dreisat.py
+++ /dev/null
@@ -1,193 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- unified_strdate,
- xpath_text,
- determine_ext,
- float_or_none,
- ExtractorError,
-)
-
-
-class DreiSatIE(InfoExtractor):
- IE_NAME = '3sat'
- _GEO_COUNTRIES = ['DE']
- _VALID_URL = r'https?://(?:www\.)?3sat\.de/mediathek/(?:(?:index|mediathek)\.php)?\?(?:(?:mode|display)=[^&]+&)*obj=(?P<id>[0-9]+)'
- _TESTS = [
- {
- 'url': 'http://www.3sat.de/mediathek/index.php?mode=play&obj=45918',
- 'md5': 'be37228896d30a88f315b638900a026e',
- 'info_dict': {
- 'id': '45918',
- 'ext': 'mp4',
- 'title': 'Waidmannsheil',
- 'description': 'md5:cce00ca1d70e21425e72c86a98a56817',
- 'uploader': 'SCHWEIZWEIT',
- 'uploader_id': '100000210',
- 'upload_date': '20140913'
- },
- 'params': {
- 'skip_download': True, # m3u8 downloads
- }
- },
- {
- 'url': 'http://www.3sat.de/mediathek/mediathek.php?mode=play&obj=51066',
- 'only_matching': True,
- },
- ]
-
- def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
- param_groups = {}
- for param_group in smil.findall(self._xpath_ns('./head/paramGroup', namespace)):
- group_id = param_group.get(self._xpath_ns(
- 'id', 'http://www.w3.org/XML/1998/namespace'))
- params = {}
- for param in param_group:
- params[param.get('name')] = param.get('value')
- param_groups[group_id] = params
-
- formats = []
- for video in smil.findall(self._xpath_ns('.//video', namespace)):
- src = video.get('src')
- if not src:
- continue
- bitrate = int_or_none(self._search_regex(r'_(\d+)k', src, 'bitrate', None)) or float_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
- group_id = video.get('paramGroup')
- param_group = param_groups[group_id]
- for proto in param_group['protocols'].split(','):
- formats.append({
- 'url': '%s://%s' % (proto, param_group['host']),
- 'app': param_group['app'],
- 'play_path': src,
- 'ext': 'flv',
- 'format_id': '%s-%d' % (proto, bitrate),
- 'tbr': bitrate,
- })
- self._sort_formats(formats)
- return formats
-
- def extract_from_xml_url(self, video_id, xml_url):
- doc = self._download_xml(
- xml_url, video_id,
- note='Downloading video info',
- errnote='Failed to download video info')
-
- status_code = xpath_text(doc, './status/statuscode')
- if status_code and status_code != 'ok':
- if status_code == 'notVisibleAnymore':
- message = 'Video %s is not available' % video_id
- else:
- message = '%s returned error: %s' % (self.IE_NAME, status_code)
- raise ExtractorError(message, expected=True)
-
- title = xpath_text(doc, './/information/title', 'title', True)
-
- urls = []
- formats = []
- for fnode in doc.findall('.//formitaeten/formitaet'):
- video_url = xpath_text(fnode, 'url')
- if not video_url or video_url in urls:
- continue
- urls.append(video_url)
-
- is_available = 'http://www.metafilegenerator' not in video_url
- geoloced = 'static_geoloced_online' in video_url
- if not is_available or geoloced:
- continue
-
- format_id = fnode.attrib['basetype']
- format_m = re.match(r'''(?x)
- (?P<vcodec>[^_]+)_(?P<acodec>[^_]+)_(?P<container>[^_]+)_
- (?P<proto>[^_]+)_(?P<index>[^_]+)_(?P<indexproto>[^_]+)
- ''', format_id)
-
- ext = determine_ext(video_url, None) or format_m.group('container')
-
- if ext == 'meta':
- continue
- elif ext == 'smil':
- formats.extend(self._extract_smil_formats(
- video_url, video_id, fatal=False))
- elif ext == 'm3u8':
- # the certificates are misconfigured (see
- # https://github.com/ytdl-org/youtube-dl/issues/8665)
- if video_url.startswith('https://'):
- continue
- formats.extend(self._extract_m3u8_formats(
- video_url, video_id, 'mp4', 'm3u8_native',
- m3u8_id=format_id, fatal=False))
- elif ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- video_url, video_id, f4m_id=format_id, fatal=False))
- else:
- quality = xpath_text(fnode, './quality')
- if quality:
- format_id += '-' + quality
-
- abr = int_or_none(xpath_text(fnode, './audioBitrate'), 1000)
- vbr = int_or_none(xpath_text(fnode, './videoBitrate'), 1000)
-
- tbr = int_or_none(self._search_regex(
- r'_(\d+)k', video_url, 'bitrate', None))
- if tbr and vbr and not abr:
- abr = tbr - vbr
-
- formats.append({
- 'format_id': format_id,
- 'url': video_url,
- 'ext': ext,
- 'acodec': format_m.group('acodec'),
- 'vcodec': format_m.group('vcodec'),
- 'abr': abr,
- 'vbr': vbr,
- 'tbr': tbr,
- 'width': int_or_none(xpath_text(fnode, './width')),
- 'height': int_or_none(xpath_text(fnode, './height')),
- 'filesize': int_or_none(xpath_text(fnode, './filesize')),
- 'protocol': format_m.group('proto').lower(),
- })
-
- geolocation = xpath_text(doc, './/details/geolocation')
- if not formats and geolocation and geolocation != 'none':
- self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
-
- self._sort_formats(formats)
-
- thumbnails = []
- for node in doc.findall('.//teaserimages/teaserimage'):
- thumbnail_url = node.text
- if not thumbnail_url:
- continue
- thumbnail = {
- 'url': thumbnail_url,
- }
- thumbnail_key = node.get('key')
- if thumbnail_key:
- m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key)
- if m:
- thumbnail['width'] = int(m.group(1))
- thumbnail['height'] = int(m.group(2))
- thumbnails.append(thumbnail)
-
- upload_date = unified_strdate(xpath_text(doc, './/details/airtime'))
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': xpath_text(doc, './/information/detail'),
- 'duration': int_or_none(xpath_text(doc, './/details/lengthSec')),
- 'thumbnails': thumbnails,
- 'uploader': xpath_text(doc, './/details/originChannelTitle'),
- 'uploader_id': xpath_text(doc, './/details/originChannelId'),
- 'upload_date': upload_date,
- 'formats': formats,
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- details_url = 'http://www.3sat.de/mediathek/xmlservice/web/beitragsDetails?id=%s' % video_id
- return self.extract_from_xml_url(video_id, details_url)
diff --git a/youtube_dl/extractor/dropbox.py b/youtube_dl/extractor/dropbox.py
deleted file mode 100644
index 14b6c00b0..000000000
--- a/youtube_dl/extractor/dropbox.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import os.path
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote
-from ..utils import url_basename
-
-
-class DropboxIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?dropbox[.]com/sh?/(?P<id>[a-zA-Z0-9]{15})/.*'
- _TESTS = [
- {
- 'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
- 'info_dict': {
- 'id': 'nelirfsxnmcfbfh',
- 'ext': 'mp4',
- 'title': 'youtube-dl test video \'ä"BaW_jenozKc'
- }
- }, {
- 'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v',
- 'only_matching': True,
- },
- ]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- fn = compat_urllib_parse_unquote(url_basename(url))
- title = os.path.splitext(fn)[0]
- video_url = re.sub(r'[?&]dl=0', '', url)
- video_url += ('?' if '?' not in video_url else '&') + 'dl=1'
-
- return {
- 'id': video_id,
- 'title': title,
- 'url': video_url,
- }
diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py
deleted file mode 100644
index 218f10209..000000000
--- a/youtube_dl/extractor/drtv.py
+++ /dev/null
@@ -1,305 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import binascii
-import hashlib
-import re
-
-
-from .common import InfoExtractor
-from ..aes import aes_cbc_decrypt
-from ..compat import compat_urllib_parse_unquote
-from ..utils import (
- bytes_to_intlist,
- ExtractorError,
- int_or_none,
- intlist_to_bytes,
- float_or_none,
- mimetype2ext,
- str_or_none,
- unified_timestamp,
- update_url_query,
- url_or_none,
-)
-
-
-class DRTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
- _GEO_BYPASS = False
- _GEO_COUNTRIES = ['DK']
- IE_NAME = 'drtv'
- _TESTS = [{
- 'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
- 'md5': '25e659cccc9a2ed956110a299fdf5983',
- 'info_dict': {
- 'id': 'klassen-darlig-taber-10',
- 'ext': 'mp4',
- 'title': 'Klassen - Dårlig taber (10)',
- 'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa',
- 'timestamp': 1539085800,
- 'upload_date': '20181009',
- 'duration': 606.84,
- 'series': 'Klassen',
- 'season': 'Klassen I',
- 'season_number': 1,
- 'season_id': 'urn:dr:mu:bundle:57d7e8216187a4031cfd6f6b',
- 'episode': 'Episode 10',
- 'episode_number': 10,
- 'release_year': 2016,
- },
- 'expected_warnings': ['Unable to download f4m manifest'],
- }, {
- # embed
- 'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang',
- 'info_dict': {
- 'id': 'urn:dr:mu:programcard:57c926176187a50a9c6e83c6',
- 'ext': 'mp4',
- 'title': 'christiania pusher street ryddes drdkrjpo',
- 'description': 'md5:2a71898b15057e9b97334f61d04e6eb5',
- 'timestamp': 1472800279,
- 'upload_date': '20160902',
- 'duration': 131.4,
- },
- 'params': {
- 'skip_download': True,
- },
- 'expected_warnings': ['Unable to download f4m manifest'],
- }, {
- # with SignLanguage formats
- 'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder',
- 'info_dict': {
- 'id': 'historien-om-danmark-stenalder',
- 'ext': 'mp4',
- 'title': 'Historien om Danmark: Stenalder',
- 'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a',
- 'timestamp': 1546628400,
- 'upload_date': '20190104',
- 'duration': 3502.56,
- 'formats': 'mincount:20',
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'https://www.dr.dk/radio/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- if '>Programmet er ikke længere tilgængeligt' in webpage:
- raise ExtractorError(
- 'Video %s is not available' % video_id, expected=True)
-
- video_id = self._search_regex(
- (r'data-(?:material-identifier|episode-slug)="([^"]+)"',
- r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'),
- webpage, 'video id', default=None)
-
- if not video_id:
- video_id = compat_urllib_parse_unquote(self._search_regex(
- r'(urn(?:%3A|:)dr(?:%3A|:)mu(?:%3A|:)programcard(?:%3A|:)[\da-f]+)',
- webpage, 'urn'))
-
- data = self._download_json(
- 'https://www.dr.dk/mu-online/api/1.4/programcard/%s' % video_id,
- video_id, 'Downloading video JSON', query={'expanded': 'true'})
-
- title = str_or_none(data.get('Title')) or re.sub(
- r'\s*\|\s*(?:TV\s*\|\s*DR|DRTV)$', '',
- self._og_search_title(webpage))
- description = self._og_search_description(
- webpage, default=None) or data.get('Description')
-
- timestamp = unified_timestamp(
- data.get('PrimaryBroadcastStartTime') or data.get('SortDateTime'))
-
- thumbnail = None
- duration = None
-
- restricted_to_denmark = False
-
- formats = []
- subtitles = {}
-
- assets = []
- primary_asset = data.get('PrimaryAsset')
- if isinstance(primary_asset, dict):
- assets.append(primary_asset)
- secondary_assets = data.get('SecondaryAssets')
- if isinstance(secondary_assets, list):
- for secondary_asset in secondary_assets:
- if isinstance(secondary_asset, dict):
- assets.append(secondary_asset)
-
- def hex_to_bytes(hex):
- return binascii.a2b_hex(hex.encode('ascii'))
-
- def decrypt_uri(e):
- n = int(e[2:10], 16)
- a = e[10 + n:]
- data = bytes_to_intlist(hex_to_bytes(e[10:10 + n]))
- key = bytes_to_intlist(hashlib.sha256(
- ('%s:sRBzYNXBzkKgnjj8pGtkACch' % a).encode('utf-8')).digest())
- iv = bytes_to_intlist(hex_to_bytes(a))
- decrypted = aes_cbc_decrypt(data, key, iv)
- return intlist_to_bytes(
- decrypted[:-decrypted[-1]]).decode('utf-8').split('?')[0]
-
- for asset in assets:
- kind = asset.get('Kind')
- if kind == 'Image':
- thumbnail = url_or_none(asset.get('Uri'))
- elif kind in ('VideoResource', 'AudioResource'):
- duration = float_or_none(asset.get('DurationInMilliseconds'), 1000)
- restricted_to_denmark = asset.get('RestrictedToDenmark')
- asset_target = asset.get('Target')
- for link in asset.get('Links', []):
- uri = link.get('Uri')
- if not uri:
- encrypted_uri = link.get('EncryptedUri')
- if not encrypted_uri:
- continue
- try:
- uri = decrypt_uri(encrypted_uri)
- except Exception:
- self.report_warning(
- 'Unable to decrypt EncryptedUri', video_id)
- continue
- uri = url_or_none(uri)
- if not uri:
- continue
- target = link.get('Target')
- format_id = target or ''
- if asset_target in ('SpokenSubtitles', 'SignLanguage', 'VisuallyInterpreted'):
- preference = -1
- format_id += '-%s' % asset_target
- elif asset_target == 'Default':
- preference = 1
- else:
- preference = None
- if target == 'HDS':
- f4m_formats = self._extract_f4m_formats(
- uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
- video_id, preference, f4m_id=format_id, fatal=False)
- if kind == 'AudioResource':
- for f in f4m_formats:
- f['vcodec'] = 'none'
- formats.extend(f4m_formats)
- elif target == 'HLS':
- formats.extend(self._extract_m3u8_formats(
- uri, video_id, 'mp4', entry_protocol='m3u8_native',
- preference=preference, m3u8_id=format_id,
- fatal=False))
- else:
- bitrate = link.get('Bitrate')
- if bitrate:
- format_id += '-%s' % bitrate
- formats.append({
- 'url': uri,
- 'format_id': format_id,
- 'tbr': int_or_none(bitrate),
- 'ext': link.get('FileFormat'),
- 'vcodec': 'none' if kind == 'AudioResource' else None,
- 'preference': preference,
- })
- subtitles_list = asset.get('SubtitlesList') or asset.get('Subtitleslist')
- if isinstance(subtitles_list, list):
- LANGS = {
- 'Danish': 'da',
- }
- for subs in subtitles_list:
- if not isinstance(subs, dict):
- continue
- sub_uri = url_or_none(subs.get('Uri'))
- if not sub_uri:
- continue
- lang = subs.get('Language') or 'da'
- subtitles.setdefault(LANGS.get(lang, lang), []).append({
- 'url': sub_uri,
- 'ext': mimetype2ext(subs.get('MimeType')) or 'vtt'
- })
-
- if not formats and restricted_to_denmark:
- self.raise_geo_restricted(
- 'Unfortunately, DR is not allowed to show this program outside Denmark.',
- countries=self._GEO_COUNTRIES)
-
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'timestamp': timestamp,
- 'duration': duration,
- 'formats': formats,
- 'subtitles': subtitles,
- 'series': str_or_none(data.get('SeriesTitle')),
- 'season': str_or_none(data.get('SeasonTitle')),
- 'season_number': int_or_none(data.get('SeasonNumber')),
- 'season_id': str_or_none(data.get('SeasonUrn')),
- 'episode': str_or_none(data.get('EpisodeTitle')),
- 'episode_number': int_or_none(data.get('EpisodeNumber')),
- 'release_year': int_or_none(data.get('ProductionYear')),
- }
-
-
-class DRTVLiveIE(InfoExtractor):
- IE_NAME = 'drtv:live'
- _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv|TV)/live/(?P<id>[\da-z-]+)'
- _GEO_COUNTRIES = ['DK']
- _TEST = {
- 'url': 'https://www.dr.dk/tv/live/dr1',
- 'info_dict': {
- 'id': 'dr1',
- 'ext': 'mp4',
- 'title': 're:^DR1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }
-
- def _real_extract(self, url):
- channel_id = self._match_id(url)
- channel_data = self._download_json(
- 'https://www.dr.dk/mu-online/api/1.0/channel/' + channel_id,
- channel_id)
- title = self._live_title(channel_data['Title'])
-
- formats = []
- for streaming_server in channel_data.get('StreamingServers', []):
- server = streaming_server.get('Server')
- if not server:
- continue
- link_type = streaming_server.get('LinkType')
- for quality in streaming_server.get('Qualities', []):
- for stream in quality.get('Streams', []):
- stream_path = stream.get('Stream')
- if not stream_path:
- continue
- stream_url = update_url_query(
- '%s/%s' % (server, stream_path), {'b': ''})
- if link_type == 'HLS':
- formats.extend(self._extract_m3u8_formats(
- stream_url, channel_id, 'mp4',
- m3u8_id=link_type, fatal=False, live=True))
- elif link_type == 'HDS':
- formats.extend(self._extract_f4m_formats(update_url_query(
- '%s/%s' % (server, stream_path), {'hdcore': '3.7.0'}),
- channel_id, f4m_id=link_type, fatal=False))
- self._sort_formats(formats)
-
- return {
- 'id': channel_id,
- 'title': title,
- 'thumbnail': channel_data.get('PrimaryImageUri'),
- 'formats': formats,
- 'is_live': True,
- }
diff --git a/youtube_dl/extractor/dumpert.py b/youtube_dl/extractor/dumpert.py
deleted file mode 100644
index be2e3d378..000000000
--- a/youtube_dl/extractor/dumpert.py
+++ /dev/null
@@ -1,69 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_b64decode
-from ..utils import (
- qualities,
- sanitized_Request,
-)
-
-
-class DumpertIE(InfoExtractor):
- _VALID_URL = r'(?P<protocol>https?)://(?:www\.)?dumpert\.nl/(?:mediabase|embed)/(?P<id>[0-9]+/[0-9a-zA-Z]+)'
- _TESTS = [{
- 'url': 'http://www.dumpert.nl/mediabase/6646981/951bc60f/',
- 'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
- 'info_dict': {
- 'id': '6646981/951bc60f',
- 'ext': 'mp4',
- 'title': 'Ik heb nieuws voor je',
- 'description': 'Niet schrikken hoor',
- 'thumbnail': r're:^https?://.*\.jpg$',
- }
- }, {
- 'url': 'http://www.dumpert.nl/embed/6675421/dc440fe7/',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- protocol = mobj.group('protocol')
-
- url = '%s://www.dumpert.nl/mediabase/%s' % (protocol, video_id)
- req = sanitized_Request(url)
- req.add_header('Cookie', 'nsfw=1; cpc=10')
- webpage = self._download_webpage(req, video_id)
-
- files_base64 = self._search_regex(
- r'data-files="([^"]+)"', webpage, 'data files')
-
- files = self._parse_json(
- compat_b64decode(files_base64).decode('utf-8'),
- video_id)
-
- quality = qualities(['flv', 'mobile', 'tablet', '720p'])
-
- formats = [{
- 'url': video_url,
- 'format_id': format_id,
- 'quality': quality(format_id),
- } for format_id, video_url in files.items() if format_id != 'still']
- self._sort_formats(formats)
-
- title = self._html_search_meta(
- 'title', webpage) or self._og_search_title(webpage)
- description = self._html_search_meta(
- 'description', webpage) or self._og_search_description(webpage)
- thumbnail = files.get('still') or self._og_search_thumbnail(webpage)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'formats': formats
- }
diff --git a/youtube_dl/extractor/eighttracks.py b/youtube_dl/extractor/eighttracks.py
deleted file mode 100644
index 9a44f89f3..000000000
--- a/youtube_dl/extractor/eighttracks.py
+++ /dev/null
@@ -1,164 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import json
-import random
-
-from .common import InfoExtractor
-from ..compat import (
- compat_str,
-)
-from ..utils import (
- ExtractorError,
-)
-
-
-class EightTracksIE(InfoExtractor):
- IE_NAME = '8tracks'
- _VALID_URL = r'https?://8tracks\.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
- _TEST = {
- 'name': 'EightTracks',
- 'url': 'http://8tracks.com/ytdl/youtube-dl-test-tracks-a',
- 'info_dict': {
- 'id': '1336550',
- 'display_id': 'youtube-dl-test-tracks-a',
- 'description': "test chars: \"'/\\ä↭",
- 'title': "youtube-dl test tracks \"'/\\ä↭<>",
- },
- 'playlist': [
- {
- 'md5': '96ce57f24389fc8734ce47f4c1abcc55',
- 'info_dict': {
- 'id': '11885610',
- 'ext': 'm4a',
- 'title': "youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
- 'uploader_id': 'ytdl'
- }
- },
- {
- 'md5': '4ab26f05c1f7291ea460a3920be8021f',
- 'info_dict': {
- 'id': '11885608',
- 'ext': 'm4a',
- 'title': "youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
- 'uploader_id': 'ytdl'
- }
- },
- {
- 'md5': 'd30b5b5f74217410f4689605c35d1fd7',
- 'info_dict': {
- 'id': '11885679',
- 'ext': 'm4a',
- 'title': "youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad",
- 'uploader_id': 'ytdl'
- }
- },
- {
- 'md5': '4eb0a669317cd725f6bbd336a29f923a',
- 'info_dict': {
- 'id': '11885680',
- 'ext': 'm4a',
- 'title': "youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad",
- 'uploader_id': 'ytdl'
- }
- },
- {
- 'md5': '1893e872e263a2705558d1d319ad19e8',
- 'info_dict': {
- 'id': '11885682',
- 'ext': 'm4a',
- 'title': "PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad",
- 'uploader_id': 'ytdl'
- }
- },
- {
- 'md5': 'b673c46f47a216ab1741ae8836af5899',
- 'info_dict': {
- 'id': '11885683',
- 'ext': 'm4a',
- 'title': "PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad",
- 'uploader_id': 'ytdl'
- }
- },
- {
- 'md5': '1d74534e95df54986da7f5abf7d842b7',
- 'info_dict': {
- 'id': '11885684',
- 'ext': 'm4a',
- 'title': "phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad",
- 'uploader_id': 'ytdl'
- }
- },
- {
- 'md5': 'f081f47af8f6ae782ed131d38b9cd1c0',
- 'info_dict': {
- 'id': '11885685',
- 'ext': 'm4a',
- 'title': "phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad",
- 'uploader_id': 'ytdl'
- }
- }
- ]
- }
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
-
- webpage = self._download_webpage(url, playlist_id)
-
- data = self._parse_json(
- self._search_regex(
- r"(?s)PAGE\.mix\s*=\s*({.+?});\n", webpage, 'trax information'),
- playlist_id)
-
- session = str(random.randint(0, 1000000000))
- mix_id = data['id']
- track_count = data['tracks_count']
- duration = data['duration']
- avg_song_duration = float(duration) / track_count
- # duration is sometimes negative, use predefined avg duration
- if avg_song_duration <= 0:
- avg_song_duration = 300
- first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
- next_url = first_url
- entries = []
-
- for i in range(track_count):
- api_json = None
- download_tries = 0
-
- while api_json is None:
- try:
- api_json = self._download_webpage(
- next_url, playlist_id,
- note='Downloading song information %d/%d' % (i + 1, track_count),
- errnote='Failed to download song information')
- except ExtractorError:
- if download_tries > 3:
- raise
- else:
- download_tries += 1
- self._sleep(avg_song_duration, playlist_id)
-
- api_data = json.loads(api_json)
- track_data = api_data['set']['track']
- info = {
- 'id': compat_str(track_data['id']),
- 'url': track_data['track_file_stream_url'],
- 'title': track_data['performer'] + ' - ' + track_data['name'],
- 'raw_title': track_data['name'],
- 'uploader_id': data['user']['login'],
- 'ext': 'm4a',
- }
- entries.append(info)
-
- next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (
- session, mix_id, track_data['id'])
- return {
- '_type': 'playlist',
- 'entries': entries,
- 'id': compat_str(mix_id),
- 'display_id': playlist_id,
- 'title': data.get('name'),
- 'description': data.get('description'),
- }
diff --git a/youtube_dl/extractor/eporner.py b/youtube_dl/extractor/eporner.py
deleted file mode 100644
index c050bf9df..000000000
--- a/youtube_dl/extractor/eporner.py
+++ /dev/null
@@ -1,130 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- encode_base_n,
- ExtractorError,
- int_or_none,
- merge_dicts,
- parse_duration,
- str_to_int,
- url_or_none,
-)
-
-
-class EpornerIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:hd-porn|embed)/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
- _TESTS = [{
- 'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
- 'md5': '39d486f046212d8e1b911c52ab4691f8',
- 'info_dict': {
- 'id': 'qlDUmNsj6VS',
- 'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video',
- 'ext': 'mp4',
- 'title': 'Infamous Tiffany Teen Strip Tease Video',
- 'description': 'md5:764f39abf932daafa37485eb46efa152',
- 'timestamp': 1232520922,
- 'upload_date': '20090121',
- 'duration': 1838,
- 'view_count': int,
- 'age_limit': 18,
- },
- 'params': {
- 'proxy': '127.0.0.1:8118'
- }
- }, {
- # New (May 2016) URL layout
- 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
- 'only_matching': True,
- }, {
- 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
- 'only_matching': True,
- }, {
- 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- display_id = mobj.group('display_id') or video_id
-
- webpage, urlh = self._download_webpage_handle(url, display_id)
-
- video_id = self._match_id(compat_str(urlh.geturl()))
-
- hash = self._search_regex(
- r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash')
-
- title = self._og_search_title(webpage, default=None) or self._html_search_regex(
- r'<title>(.+?) - EPORNER', webpage, 'title')
-
- # Reverse engineered from vjs.js
- def calc_hash(s):
- return ''.join((encode_base_n(int(s[lb:lb + 8], 16), 36) for lb in range(0, 32, 8)))
-
- video = self._download_json(
- 'http://www.eporner.com/xhr/video/%s' % video_id,
- display_id, note='Downloading video JSON',
- query={
- 'hash': calc_hash(hash),
- 'device': 'generic',
- 'domain': 'www.eporner.com',
- 'fallback': 'false',
- })
-
- if video.get('available') is False:
- raise ExtractorError(
- '%s said: %s' % (self.IE_NAME, video['message']), expected=True)
-
- sources = video['sources']
-
- formats = []
- for kind, formats_dict in sources.items():
- if not isinstance(formats_dict, dict):
- continue
- for format_id, format_dict in formats_dict.items():
- if not isinstance(format_dict, dict):
- continue
- src = url_or_none(format_dict.get('src'))
- if not src or not src.startswith('http'):
- continue
- if kind == 'hls':
- formats.extend(self._extract_m3u8_formats(
- src, display_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id=kind, fatal=False))
- else:
- height = int_or_none(self._search_regex(
- r'(\d+)[pP]', format_id, 'height', default=None))
- fps = int_or_none(self._search_regex(
- r'(\d+)fps', format_id, 'fps', default=None))
-
- formats.append({
- 'url': src,
- 'format_id': format_id,
- 'height': height,
- 'fps': fps,
- })
- self._sort_formats(formats)
-
- json_ld = self._search_json_ld(webpage, display_id, default={})
-
- duration = parse_duration(self._html_search_meta(
- 'duration', webpage, default=None))
- view_count = str_to_int(self._search_regex(
- r'id="cinemaviews">\s*([0-9,]+)\s*<small>views',
- webpage, 'view count', fatal=False))
-
- return merge_dicts(json_ld, {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'duration': duration,
- 'view_count': view_count,
- 'formats': formats,
- 'age_limit': 18,
- })
diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py
deleted file mode 100644
index 7a1e0dad6..000000000
--- a/youtube_dl/extractor/extractors.py
+++ /dev/null
@@ -1,1520 +0,0 @@
-# flake8: noqa
-from __future__ import unicode_literals
-
-from .abc import (
- ABCIE,
- ABCIViewIE,
-)
-from .abcnews import (
- AbcNewsIE,
- AbcNewsVideoIE,
-)
-from .abcotvs import (
- ABCOTVSIE,
- ABCOTVSClipsIE,
-)
-from .academicearth import AcademicEarthCourseIE
-from .acast import (
- ACastIE,
- ACastChannelIE,
-)
-from .addanime import AddAnimeIE
-from .adn import ADNIE
-from .adobeconnect import AdobeConnectIE
-from .adobetv import (
- AdobeTVIE,
- AdobeTVShowIE,
- AdobeTVChannelIE,
- AdobeTVVideoIE,
-)
-from .adultswim import AdultSwimIE
-from .aenetworks import (
- AENetworksIE,
- HistoryTopicIE,
-)
-from .afreecatv import AfreecaTVIE
-from .airmozilla import AirMozillaIE
-from .aljazeera import AlJazeeraIE
-from .alphaporno import AlphaPornoIE
-from .amcnetworks import AMCNetworksIE
-from .americastestkitchen import AmericasTestKitchenIE
-from .animeondemand import AnimeOnDemandIE
-from .anvato import AnvatoIE
-from .aol import AolIE
-from .allocine import AllocineIE
-from .aliexpress import AliExpressLiveIE
-from .apa import APAIE
-from .aparat import AparatIE
-from .appleconnect import AppleConnectIE
-from .appletrailers import (
- AppleTrailersIE,
- AppleTrailersSectionIE,
-)
-from .archiveorg import ArchiveOrgIE
-from .arkena import ArkenaIE
-from .ard import (
- ARDBetaMediathekIE,
- ARDIE,
- ARDMediathekIE,
-)
-from .arte import (
- ArteTVPlus7IE,
- ArteTVEmbedIE,
- ArteTVPlaylistIE,
-)
-from .asiancrush import (
- AsianCrushIE,
- AsianCrushPlaylistIE,
-)
-from .atresplayer import AtresPlayerIE
-from .atttechchannel import ATTTechChannelIE
-from .atvat import ATVAtIE
-from .audimedia import AudiMediaIE
-from .audioboom import AudioBoomIE
-from .audiomack import AudiomackIE, AudiomackAlbumIE
-from .awaan import (
- AWAANIE,
- AWAANVideoIE,
- AWAANLiveIE,
- AWAANSeasonIE,
-)
-from .azmedien import AZMedienIE
-from .baidu import BaiduVideoIE
-from .bambuser import BambuserIE, BambuserChannelIE
-from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
-from .bbc import (
- BBCCoUkIE,
- BBCCoUkArticleIE,
- BBCCoUkIPlayerPlaylistIE,
- BBCCoUkPlaylistIE,
- BBCIE,
-)
-from .beampro import (
- BeamProLiveIE,
- BeamProVodIE,
-)
-from .beeg import BeegIE
-from .behindkink import BehindKinkIE
-from .bellmedia import BellMediaIE
-from .beatport import BeatportIE
-from .bet import BetIE
-from .bfi import BFIPlayerIE
-from .bigflix import BigflixIE
-from .bild import BildIE
-from .bilibili import (
- BiliBiliIE,
- BiliBiliBangumiIE,
- BilibiliAudioIE,
- BilibiliAudioAlbumIE,
-)
-from .biobiochiletv import BioBioChileTVIE
-from .bitchute import (
- BitChuteIE,
- BitChuteChannelIE,
-)
-from .biqle import BIQLEIE
-from .bleacherreport import (
- BleacherReportIE,
- BleacherReportCMSIE,
-)
-from .blinkx import BlinkxIE
-from .bloomberg import BloombergIE
-from .bokecc import BokeCCIE
-from .bostonglobe import BostonGlobeIE
-from .bpb import BpbIE
-from .br import (
- BRIE,
- BRMediathekIE,
-)
-from .bravotv import BravoTVIE
-from .breakcom import BreakIE
-from .brightcove import (
- BrightcoveLegacyIE,
- BrightcoveNewIE,
-)
-from .businessinsider import BusinessInsiderIE
-from .buzzfeed import BuzzFeedIE
-from .byutv import BYUtvIE
-from .c56 import C56IE
-from .camdemy import (
- CamdemyIE,
- CamdemyFolderIE
-)
-from .cammodels import CamModelsIE
-from .camtube import CamTubeIE
-from .camwithher import CamWithHerIE
-from .canalplus import CanalplusIE
-from .canalc2 import Canalc2IE
-from .canvas import (
- CanvasIE,
- CanvasEenIE,
- VrtNUIE,
-)
-from .carambatv import (
- CarambaTVIE,
- CarambaTVPageIE,
-)
-from .cartoonnetwork import CartoonNetworkIE
-from .cbc import (
- CBCIE,
- CBCPlayerIE,
- CBCWatchVideoIE,
- CBCWatchIE,
- CBCOlympicsIE,
-)
-from .cbs import CBSIE
-from .cbslocal import CBSLocalIE
-from .cbsinteractive import CBSInteractiveIE
-from .cbsnews import (
- CBSNewsEmbedIE,
- CBSNewsIE,
- CBSNewsLiveVideoIE,
-)
-from .cbssports import CBSSportsIE
-from .ccc import (
- CCCIE,
- CCCPlaylistIE,
-)
-from .ccma import CCMAIE
-from .cctv import CCTVIE
-from .cda import CDAIE
-from .ceskatelevize import (
- CeskaTelevizeIE,
- CeskaTelevizePoradyIE,
-)
-from .channel9 import Channel9IE
-from .charlierose import CharlieRoseIE
-from .chaturbate import ChaturbateIE
-from .chilloutzone import ChilloutzoneIE
-from .chirbit import (
- ChirbitIE,
- ChirbitProfileIE,
-)
-from .cinchcast import CinchcastIE
-from .cinemax import CinemaxIE
-from .ciscolive import (
- CiscoLiveSessionIE,
- CiscoLiveSearchIE,
-)
-from .cjsw import CJSWIE
-from .cliphunter import CliphunterIE
-from .clippit import ClippitIE
-from .cliprs import ClipRsIE
-from .clipsyndicate import ClipsyndicateIE
-from .closertotruth import CloserToTruthIE
-from .cloudflarestream import CloudflareStreamIE
-from .cloudy import CloudyIE
-from .clubic import ClubicIE
-from .clyp import ClypIE
-from .cmt import CMTIE
-from .cnbc import (
- CNBCIE,
- CNBCVideoIE,
-)
-from .cnn import (
- CNNIE,
- CNNBlogsIE,
- CNNArticleIE,
-)
-from .coub import CoubIE
-from .comedycentral import (
- ComedyCentralFullEpisodesIE,
- ComedyCentralIE,
- ComedyCentralShortnameIE,
- ComedyCentralTVIE,
- ToshIE,
-)
-from .comcarcoff import ComCarCoffIE
-from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
-from .commonprotocols import (
- MmsIE,
- RtmpIE,
-)
-from .condenast import CondeNastIE
-from .corus import CorusIE
-from .cracked import CrackedIE
-from .crackle import CrackleIE
-from .crooksandliars import CrooksAndLiarsIE
-from .crunchyroll import (
- CrunchyrollIE,
- CrunchyrollShowPlaylistIE
-)
-from .cspan import CSpanIE
-from .ctsnews import CtsNewsIE
-from .ctvnews import CTVNewsIE
-from .cultureunplugged import CultureUnpluggedIE
-from .curiositystream import (
- CuriosityStreamIE,
- CuriosityStreamCollectionIE,
-)
-from .cwtv import CWTVIE
-from .dailymail import DailyMailIE
-from .dailymotion import (
- DailymotionIE,
- DailymotionPlaylistIE,
- DailymotionUserIE,
-)
-from .daisuki import (
- DaisukiMottoIE,
- DaisukiMottoPlaylistIE,
-)
-from .daum import (
- DaumIE,
- DaumClipIE,
- DaumPlaylistIE,
- DaumUserIE,
-)
-from .dbtv import DBTVIE
-from .dctp import DctpTvIE
-from .deezer import DeezerPlaylistIE
-from .democracynow import DemocracynowIE
-from .dfb import DFBIE
-from .dhm import DHMIE
-from .digg import DiggIE
-from .dotsub import DotsubIE
-from .douyutv import (
- DouyuShowIE,
- DouyuTVIE,
-)
-from .dplay import (
- DPlayIE,
- DPlayItIE,
-)
-from .dreisat import DreiSatIE
-from .drbonanza import DRBonanzaIE
-from .drtuber import DrTuberIE
-from .drtv import (
- DRTVIE,
- DRTVLiveIE,
-)
-from .dtube import DTubeIE
-from .dvtv import DVTVIE
-from .dumpert import DumpertIE
-from .defense import DefenseGouvFrIE
-from .discovery import DiscoveryIE
-from .discoverygo import (
- DiscoveryGoIE,
- DiscoveryGoPlaylistIE,
-)
-from .discoverynetworks import DiscoveryNetworksDeIE
-from .discoveryvr import DiscoveryVRIE
-from .disney import DisneyIE
-from .dispeak import DigitallySpeakingIE
-from .dropbox import DropboxIE
-from .dw import (
- DWIE,
- DWArticleIE,
-)
-from .eagleplatform import EaglePlatformIE
-from .ebaumsworld import EbaumsWorldIE
-from .echomsk import EchoMskIE
-from .egghead import (
- EggheadCourseIE,
- EggheadLessonIE,
-)
-from .ehow import EHowIE
-from .eighttracks import EightTracksIE
-from .einthusan import EinthusanIE
-from .eitb import EitbIE
-from .ellentube import (
- EllenTubeIE,
- EllenTubeVideoIE,
- EllenTubePlaylistIE,
-)
-from .elpais import ElPaisIE
-from .embedly import EmbedlyIE
-from .engadget import EngadgetIE
-from .eporner import EpornerIE
-from .eroprofile import EroProfileIE
-from .escapist import EscapistIE
-from .espn import (
- ESPNIE,
- ESPNArticleIE,
- FiveThirtyEightIE,
-)
-from .esri import EsriVideoIE
-from .europa import EuropaIE
-from .everyonesmixtape import EveryonesMixtapeIE
-from .expotv import ExpoTVIE
-from .expressen import ExpressenIE
-from .extremetube import ExtremeTubeIE
-from .eyedotv import EyedoTVIE
-from .facebook import (
- FacebookIE,
- FacebookPluginsVideoIE,
-)
-from .faz import FazIE
-from .fc2 import (
- FC2IE,
- FC2EmbedIE,
-)
-from .fczenit import FczenitIE
-from .filmon import (
- FilmOnIE,
- FilmOnChannelIE,
-)
-from .filmweb import FilmwebIE
-from .firsttv import FirstTVIE
-from .fivemin import FiveMinIE
-from .fivetv import FiveTVIE
-from .flickr import FlickrIE
-from .flipagram import FlipagramIE
-from .folketinget import FolketingetIE
-from .footyroom import FootyRoomIE
-from .formula1 import Formula1IE
-from .fourtube import (
- FourTubeIE,
- PornTubeIE,
- PornerBrosIE,
- FuxIE,
-)
-from .fox import FOXIE
-from .fox9 import FOX9IE
-from .foxgay import FoxgayIE
-from .foxnews import (
- FoxNewsIE,
- FoxNewsArticleIE,
-)
-from .foxsports import FoxSportsIE
-from .franceculture import FranceCultureIE
-from .franceinter import FranceInterIE
-from .francetv import (
- FranceTVIE,
- FranceTVSiteIE,
- FranceTVEmbedIE,
- FranceTVInfoIE,
- FranceTVInfoSportIE,
- FranceTVJeunesseIE,
- GenerationWhatIE,
- CultureboxIE,
-)
-from .freesound import FreesoundIE
-from .freespeech import FreespeechIE
-from .freshlive import FreshLiveIE
-from .frontendmasters import (
- FrontendMastersIE,
- FrontendMastersLessonIE,
- FrontendMastersCourseIE
-)
-from .funimation import FunimationIE
-from .funk import FunkIE
-from .fusion import FusionIE
-from .fxnetworks import FXNetworksIE
-from .gaia import GaiaIE
-from .gameinformer import GameInformerIE
-from .gameone import (
- GameOneIE,
- GameOnePlaylistIE,
-)
-from .gamespot import GameSpotIE
-from .gamestar import GameStarIE
-from .gaskrank import GaskrankIE
-from .gazeta import GazetaIE
-from .gdcvault import GDCVaultIE
-from .generic import GenericIE
-from .gfycat import GfycatIE
-from .giantbomb import GiantBombIE
-from .giga import GigaIE
-from .glide import GlideIE
-from .globo import (
- GloboIE,
- GloboArticleIE,
-)
-from .go import GoIE
-from .go90 import Go90IE
-from .godtube import GodTubeIE
-from .golem import GolemIE
-from .googledrive import GoogleDriveIE
-from .googleplus import GooglePlusIE
-from .googlesearch import GoogleSearchIE
-from .goshgay import GoshgayIE
-from .gputechconf import GPUTechConfIE
-from .groupon import GrouponIE
-from .hark import HarkIE
-from .hbo import HBOIE
-from .hearthisat import HearThisAtIE
-from .heise import HeiseIE
-from .hellporno import HellPornoIE
-from .helsinki import HelsinkiIE
-from .hentaistigma import HentaiStigmaIE
-from .hgtv import HGTVComShowIE
-from .hketv import HKETVIE
-from .hidive import HiDiveIE
-from .historicfilms import HistoricFilmsIE
-from .hitbox import HitboxIE, HitboxLiveIE
-from .hitrecord import HitRecordIE
-from .hornbunny import HornBunnyIE
-from .hotnewhiphop import HotNewHipHopIE
-from .hotstar import (
- HotStarIE,
- HotStarPlaylistIE,
-)
-from .howcast import HowcastIE
-from .howstuffworks import HowStuffWorksIE
-from .hrti import (
- HRTiIE,
- HRTiPlaylistIE,
-)
-from .huajiao import HuajiaoIE
-from .huffpost import HuffPostIE
-from .hungama import (
- HungamaIE,
- HungamaSongIE,
-)
-from .hypem import HypemIE
-from .iconosquare import IconosquareIE
-from .ign import (
- IGNIE,
- OneUPIE,
- PCMagIE,
-)
-from .imdb import (
- ImdbIE,
- ImdbListIE
-)
-from .imgur import (
- ImgurIE,
- ImgurAlbumIE,
- ImgurGalleryIE,
-)
-from .ina import InaIE
-from .inc import IncIE
-from .indavideo import IndavideoEmbedIE
-from .infoq import InfoQIE
-from .instagram import (
- InstagramIE,
- InstagramUserIE,
- InstagramTagIE,
-)
-from .internazionale import InternazionaleIE
-from .internetvideoarchive import InternetVideoArchiveIE
-from .iprima import IPrimaIE
-from .iqiyi import IqiyiIE
-from .ir90tv import Ir90TvIE
-from .itv import (
- ITVIE,
- ITVBTCCIE,
-)
-from .ivi import (
- IviIE,
- IviCompilationIE
-)
-from .ivideon import IvideonIE
-from .iwara import IwaraIE
-from .izlesene import IzleseneIE
-from .jamendo import (
- JamendoIE,
- JamendoAlbumIE,
-)
-from .jeuxvideo import JeuxVideoIE
-from .jove import JoveIE
-from .joj import JojIE
-from .jwplatform import JWPlatformIE
-from .jpopsukitv import JpopsukiIE
-from .kakao import KakaoIE
-from .kaltura import KalturaIE
-from .kanalplay import KanalPlayIE
-from .kankan import KankanIE
-from .karaoketv import KaraoketvIE
-from .karrierevideos import KarriereVideosIE
-from .keezmovies import KeezMoviesIE
-from .ketnet import KetnetIE
-from .khanacademy import KhanAcademyIE
-from .kickstarter import KickStarterIE
-from .kinopoisk import KinoPoiskIE
-from .keek import KeekIE
-from .konserthusetplay import KonserthusetPlayIE
-from .kontrtube import KontrTubeIE
-from .krasview import KrasViewIE
-from .ku6 import Ku6IE
-from .kusi import KUSIIE
-from .kuwo import (
- KuwoIE,
- KuwoAlbumIE,
- KuwoChartIE,
- KuwoSingerIE,
- KuwoCategoryIE,
- KuwoMvIE,
-)
-from .la7 import LA7IE
-from .laola1tv import (
- Laola1TvEmbedIE,
- Laola1TvIE,
- EHFTVIE,
- ITTFIE,
-)
-from .lci import LCIIE
-from .lcp import (
- LcpPlayIE,
- LcpIE,
-)
-from .learnr import LearnrIE
-from .lecture2go import Lecture2GoIE
-from .lecturio import (
- LecturioIE,
- LecturioCourseIE,
- LecturioDeCourseIE,
-)
-from .leeco import (
- LeIE,
- LePlaylistIE,
- LetvCloudIE,
-)
-from .lego import LEGOIE
-from .lemonde import LemondeIE
-from .lenta import LentaIE
-from .libraryofcongress import LibraryOfCongressIE
-from .libsyn import LibsynIE
-from .lifenews import (
- LifeNewsIE,
- LifeEmbedIE,
-)
-from .limelight import (
- LimelightMediaIE,
- LimelightChannelIE,
- LimelightChannelListIE,
-)
-from .line import LineTVIE
-from .linkedin import (
- LinkedInLearningIE,
- LinkedInLearningCourseIE,
-)
-from .linuxacademy import LinuxAcademyIE
-from .litv import LiTVIE
-from .livejournal import LiveJournalIE
-from .liveleak import (
- LiveLeakIE,
- LiveLeakEmbedIE,
-)
-from .livestream import (
- LivestreamIE,
- LivestreamOriginalIE,
- LivestreamShortenerIE,
-)
-from .lnkgo import LnkGoIE
-from .localnews8 import LocalNews8IE
-from .lovehomeporn import LoveHomePornIE
-from .lrt import LRTIE
-from .lynda import (
- LyndaIE,
- LyndaCourseIE
-)
-from .m6 import M6IE
-from .macgamestore import MacGameStoreIE
-from .mailru import (
- MailRuIE,
- MailRuMusicIE,
- MailRuMusicSearchIE,
-)
-from .makertv import MakerTVIE
-from .malltv import MallTVIE
-from .mangomolo import (
- MangomoloVideoIE,
- MangomoloLiveIE,
-)
-from .manyvids import ManyVidsIE
-from .markiza import (
- MarkizaIE,
- MarkizaPageIE,
-)
-from .massengeschmacktv import MassengeschmackTVIE
-from .matchtv import MatchTVIE
-from .mdr import MDRIE
-from .mediaset import MediasetIE
-from .mediasite import (
- MediasiteIE,
- MediasiteCatalogIE,
- MediasiteNamedCatalogIE,
-)
-from .medici import MediciIE
-from .megaphone import MegaphoneIE
-from .meipai import MeipaiIE
-from .melonvod import MelonVODIE
-from .meta import METAIE
-from .metacafe import MetacafeIE
-from .metacritic import MetacriticIE
-from .mgoon import MgoonIE
-from .mgtv import MGTVIE
-from .miaopai import MiaoPaiIE
-from .microsoftvirtualacademy import (
- MicrosoftVirtualAcademyIE,
- MicrosoftVirtualAcademyCourseIE,
-)
-from .minhateca import MinhatecaIE
-from .ministrygrid import MinistryGridIE
-from .minoto import MinotoIE
-from .miomio import MioMioIE
-from .mit import TechTVMITIE, MITIE, OCWMITIE
-from .mitele import MiTeleIE
-from .mixcloud import (
- MixcloudIE,
- MixcloudUserIE,
- MixcloudPlaylistIE,
- MixcloudStreamIE,
-)
-from .mlb import MLBIE
-from .mnet import MnetIE
-from .moevideo import MoeVideoIE
-from .mofosex import MofosexIE
-from .mojvideo import MojvideoIE
-from .morningstar import MorningstarIE
-from .motherless import (
- MotherlessIE,
- MotherlessGroupIE
-)
-from .motorsport import MotorsportIE
-from .movieclips import MovieClipsIE
-from .moviezine import MoviezineIE
-from .movingimage import MovingImageIE
-from .msn import MSNIE
-from .mtv import (
- MTVIE,
- MTVVideoIE,
- MTVServicesEmbeddedIE,
- MTVDEIE,
- MTV81IE,
-)
-from .muenchentv import MuenchenTVIE
-from .musicplayon import MusicPlayOnIE
-from .mwave import MwaveIE, MwaveMeetGreetIE
-from .mychannels import MyChannelsIE
-from .myspace import MySpaceIE, MySpaceAlbumIE
-from .myspass import MySpassIE
-from .myvi import (
- MyviIE,
- MyviEmbedIE,
-)
-from .myvidster import MyVidsterIE
-from .nationalgeographic import (
- NationalGeographicVideoIE,
- NationalGeographicTVIE,
-)
-from .naver import NaverIE
-from .nba import NBAIE
-from .nbc import (
- CSNNEIE,
- NBCIE,
- NBCNewsIE,
- NBCOlympicsIE,
- NBCOlympicsStreamIE,
- NBCSportsIE,
- NBCSportsStreamIE,
- NBCSportsVPlayerIE,
-)
-from .ndr import (
- NDRIE,
- NJoyIE,
- NDREmbedBaseIE,
- NDREmbedIE,
- NJoyEmbedIE,
-)
-from .ndtv import NDTVIE
-from .netzkino import NetzkinoIE
-from .nerdcubed import NerdCubedFeedIE
-from .neteasemusic import (
- NetEaseMusicIE,
- NetEaseMusicAlbumIE,
- NetEaseMusicSingerIE,
- NetEaseMusicListIE,
- NetEaseMusicMvIE,
- NetEaseMusicProgramIE,
- NetEaseMusicDjRadioIE,
-)
-from .newgrounds import (
- NewgroundsIE,
- NewgroundsPlaylistIE,
-)
-from .newstube import NewstubeIE
-from .nextmedia import (
- NextMediaIE,
- NextMediaActionNewsIE,
- AppleDailyIE,
- NextTVIE,
-)
-from .nexx import (
- NexxIE,
- NexxEmbedIE,
-)
-from .nfl import NFLIE
-from .nhk import NhkVodIE
-from .nhl import NHLIE
-from .nick import (
- NickIE,
- NickBrIE,
- NickDeIE,
- NickNightIE,
- NickRuIE,
-)
-from .niconico import NiconicoIE, NiconicoPlaylistIE
-from .ninecninemedia import NineCNineMediaIE
-from .ninegag import NineGagIE
-from .ninenow import NineNowIE
-from .nintendo import NintendoIE
-from .njpwworld import NJPWWorldIE
-from .nobelprize import NobelPrizeIE
-from .noco import NocoIE
-from .nonktube import NonkTubeIE
-from .noovo import NoovoIE
-from .normalboots import NormalbootsIE
-from .nosvideo import NosVideoIE
-from .nova import (
- NovaEmbedIE,
- NovaIE,
-)
-from .nowness import (
- NownessIE,
- NownessPlaylistIE,
- NownessSeriesIE,
-)
-from .noz import NozIE
-from .npo import (
- AndereTijdenIE,
- NPOIE,
- NPOLiveIE,
- NPORadioIE,
- NPORadioFragmentIE,
- SchoolTVIE,
- HetKlokhuisIE,
- VPROIE,
- WNLIE,
-)
-from .npr import NprIE
-from .nrk import (
- NRKIE,
- NRKPlaylistIE,
- NRKSkoleIE,
- NRKTVIE,
- NRKTVDirekteIE,
- NRKTVEpisodeIE,
- NRKTVEpisodesIE,
- NRKTVSeasonIE,
- NRKTVSeriesIE,
-)
-from .nrl import NRLTVIE
-from .ntvcojp import NTVCoJpCUIE
-from .ntvde import NTVDeIE
-from .ntvru import NTVRuIE
-from .nytimes import (
- NYTimesIE,
- NYTimesArticleIE,
-)
-from .nuvid import NuvidIE
-from .nzz import NZZIE
-from .odatv import OdaTVIE
-from .odnoklassniki import OdnoklassnikiIE
-from .oktoberfesttv import OktoberfestTVIE
-from .ondemandkorea import OnDemandKoreaIE
-from .onet import (
- OnetIE,
- OnetChannelIE,
- OnetMVPIE,
- OnetPlIE,
-)
-from .onionstudios import OnionStudiosIE
-from .ooyala import (
- OoyalaIE,
- OoyalaExternalIE,
-)
-from .openload import (
- OpenloadIE,
- VerystreamIE,
-)
-from .ora import OraTVIE
-from .orf import (
- ORFTVthekIE,
- ORFFM4IE,
- ORFFM4StoryIE,
- ORFOE1IE,
- ORFIPTVIE,
-)
-from .outsidetv import OutsideTVIE
-from .packtpub import (
- PacktPubIE,
- PacktPubCourseIE,
-)
-from .pandatv import PandaTVIE
-from .pandoratv import PandoraTVIE
-from .parliamentliveuk import ParliamentLiveUKIE
-from .patreon import PatreonIE
-from .pbs import PBSIE
-from .pearvideo import PearVideoIE
-from .peertube import PeerTubeIE
-from .people import PeopleIE
-from .performgroup import PerformGroupIE
-from .periscope import (
- PeriscopeIE,
- PeriscopeUserIE,
-)
-from .philharmoniedeparis import PhilharmonieDeParisIE
-from .phoenix import PhoenixIE
-from .photobucket import PhotobucketIE
-from .picarto import (
- PicartoIE,
- PicartoVodIE,
-)
-from .piksel import PikselIE
-from .pinkbike import PinkbikeIE
-from .pladform import PladformIE
-from .platzi import (
- PlatziIE,
- PlatziCourseIE,
-)
-from .playfm import PlayFMIE
-from .playplustv import PlayPlusTVIE
-from .plays import PlaysTVIE
-from .playtvak import PlaytvakIE
-from .playvid import PlayvidIE
-from .playwire import PlaywireIE
-from .pluralsight import (
- PluralsightIE,
- PluralsightCourseIE,
-)
-from .podomatic import PodomaticIE
-from .pokemon import PokemonIE
-from .polskieradio import (
- PolskieRadioIE,
- PolskieRadioCategoryIE,
-)
-from .popcorntv import PopcornTVIE
-from .porn91 import Porn91IE
-from .porncom import PornComIE
-from .pornhd import PornHdIE
-from .pornhub import (
- PornHubIE,
- PornHubUserIE,
- PornHubPagedVideoListIE,
- PornHubUserVideosUploadIE,
-)
-from .pornotube import PornotubeIE
-from .pornovoisines import PornoVoisinesIE
-from .pornoxo import PornoXOIE
-from .puhutv import (
- PuhuTVIE,
- PuhuTVSerieIE,
-)
-from .presstv import PressTVIE
-from .prosiebensat1 import ProSiebenSat1IE
-from .puls4 import Puls4IE
-from .pyvideo import PyvideoIE
-from .qqmusic import (
- QQMusicIE,
- QQMusicSingerIE,
- QQMusicAlbumIE,
- QQMusicToplistIE,
- QQMusicPlaylistIE,
-)
-from .r7 import (
- R7IE,
- R7ArticleIE,
-)
-from .radiocanada import (
- RadioCanadaIE,
- RadioCanadaAudioVideoIE,
-)
-from .radiode import RadioDeIE
-from .radiojavan import RadioJavanIE
-from .radiobremen import RadioBremenIE
-from .radiofrance import RadioFranceIE
-from .rai import (
- RaiPlayIE,
- RaiPlayLiveIE,
- RaiPlayPlaylistIE,
- RaiIE,
-)
-from .raywenderlich import (
- RayWenderlichIE,
- RayWenderlichCourseIE,
-)
-from .rbmaradio import RBMARadioIE
-from .rds import RDSIE
-from .redbulltv import (
- RedBullTVIE,
- RedBullTVRrnContentIE,
-)
-from .reddit import (
- RedditIE,
- RedditRIE,
-)
-from .redtube import RedTubeIE
-from .regiotv import RegioTVIE
-from .rentv import (
- RENTVIE,
- RENTVArticleIE,
-)
-from .restudy import RestudyIE
-from .reuters import ReutersIE
-from .reverbnation import ReverbNationIE
-from .revision3 import (
- Revision3EmbedIE,
- Revision3IE,
-)
-from .rice import RICEIE
-from .rmcdecouverte import RMCDecouverteIE
-from .ro220 import Ro220IE
-from .rockstargames import RockstarGamesIE
-from .roosterteeth import RoosterTeethIE
-from .rottentomatoes import RottenTomatoesIE
-from .roxwel import RoxwelIE
-from .rozhlas import RozhlasIE
-from .rtbf import RTBFIE
-from .rte import RteIE, RteRadioIE
-from .rtlnl import RtlNlIE
-from .rtl2 import (
- RTL2IE,
- RTL2YouIE,
- RTL2YouSeriesIE,
-)
-from .rtp import RTPIE
-from .rts import RTSIE
-from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
-from .rtvnh import RTVNHIE
-from .rtvs import RTVSIE
-from .ruhd import RUHDIE
-from .rutube import (
- RutubeIE,
- RutubeChannelIE,
- RutubeEmbedIE,
- RutubeMovieIE,
- RutubePersonIE,
- RutubePlaylistIE,
-)
-from .rutv import RUTVIE
-from .ruutu import RuutuIE
-from .ruv import RuvIE
-from .safari import (
- SafariIE,
- SafariApiIE,
- SafariCourseIE,
-)
-from .sapo import SapoIE
-from .savefrom import SaveFromIE
-from .sbs import SBSIE
-from .screencast import ScreencastIE
-from .screencastomatic import ScreencastOMaticIE
-from .scrippsnetworks import ScrippsNetworksWatchIE
-from .seeker import SeekerIE
-from .senateisvp import SenateISVPIE
-from .sendtonews import SendtoNewsIE
-from .servingsys import ServingSysIE
-from .servus import ServusIE
-from .sevenplus import SevenPlusIE
-from .sexu import SexuIE
-from .seznamzpravy import (
- SeznamZpravyIE,
- SeznamZpravyArticleIE,
-)
-from .shahid import (
- ShahidIE,
- ShahidShowIE,
-)
-from .shared import (
- SharedIE,
- VivoIE,
-)
-from .showroomlive import ShowRoomLiveIE
-from .sina import SinaIE
-from .sixplay import SixPlayIE
-from .skylinewebcams import SkylineWebcamsIE
-from .skynewsarabia import (
- SkyNewsArabiaIE,
- SkyNewsArabiaArticleIE,
-)
-from .sky import (
- SkyNewsIE,
- SkySportsIE,
-)
-from .slideshare import SlideshareIE
-from .slideslive import SlidesLiveIE
-from .slutload import SlutloadIE
-from .smotri import (
- SmotriIE,
- SmotriCommunityIE,
- SmotriUserIE,
- SmotriBroadcastIE,
-)
-from .snotr import SnotrIE
-from .sohu import SohuIE
-from .sonyliv import SonyLIVIE
-from .soundcloud import (
- SoundcloudIE,
- SoundcloudSetIE,
- SoundcloudUserIE,
- SoundcloudTrackStationIE,
- SoundcloudPlaylistIE,
- SoundcloudSearchIE,
-)
-from .soundgasm import (
- SoundgasmIE,
- SoundgasmProfileIE
-)
-from .southpark import (
- SouthParkIE,
- SouthParkDeIE,
- SouthParkDkIE,
- SouthParkEsIE,
- SouthParkNlIE
-)
-from .spankbang import (
- SpankBangIE,
- SpankBangPlaylistIE,
-)
-from .spankwire import SpankwireIE
-from .spiegel import SpiegelIE, SpiegelArticleIE
-from .spiegeltv import SpiegeltvIE
-from .spike import (
- BellatorIE,
- ParamountNetworkIE,
-)
-from .stitcher import StitcherIE
-from .sport5 import Sport5IE
-from .sportbox import SportBoxIE
-from .sportdeutschland import SportDeutschlandIE
-from .springboardplatform import SpringboardPlatformIE
-from .sprout import SproutIE
-from .srgssr import (
- SRGSSRIE,
- SRGSSRPlayIE,
-)
-from .srmediathek import SRMediathekIE
-from .stanfordoc import StanfordOpenClassroomIE
-from .steam import SteamIE
-from .streamable import StreamableIE
-from .streamango import StreamangoIE
-from .streamcloud import StreamcloudIE
-from .streamcz import StreamCZIE
-from .streetvoice import StreetVoiceIE
-from .stretchinternet import StretchInternetIE
-from .stv import STVPlayerIE
-from .sunporno import SunPornoIE
-from .sverigesradio import (
- SverigesRadioEpisodeIE,
- SverigesRadioPublicationIE,
-)
-from .svt import (
- SVTIE,
- SVTPageIE,
- SVTPlayIE,
- SVTSeriesIE,
-)
-from .swrmediathek import SWRMediathekIE
-from .syfy import SyfyIE
-from .sztvhu import SztvHuIE
-from .tagesschau import (
- TagesschauPlayerIE,
- TagesschauIE,
-)
-from .tass import TassIE
-from .tastytrade import TastyTradeIE
-from .tbs import TBSIE
-from .tdslifeway import TDSLifewayIE
-from .teachable import (
- TeachableIE,
- TeachableCourseIE,
-)
-from .teachertube import (
- TeacherTubeIE,
- TeacherTubeUserIE,
-)
-from .teachingchannel import TeachingChannelIE
-from .teamcoco import TeamcocoIE
-from .teamtreehouse import TeamTreeHouseIE
-from .techtalks import TechTalksIE
-from .ted import TEDIE
-from .tele5 import Tele5IE
-from .tele13 import Tele13IE
-from .telebruxelles import TeleBruxellesIE
-from .telecinco import TelecincoIE
-from .telegraaf import TelegraafIE
-from .telemb import TeleMBIE
-from .telequebec import (
- TeleQuebecIE,
- TeleQuebecSquatIE,
- TeleQuebecEmissionIE,
- TeleQuebecLiveIE,
-)
-from .teletask import TeleTaskIE
-from .telewebion import TelewebionIE
-from .tennistv import TennisTVIE
-from .testurl import TestURLIE
-from .tf1 import TF1IE
-from .tfo import TFOIE
-from .theintercept import TheInterceptIE
-from .theplatform import (
- ThePlatformIE,
- ThePlatformFeedIE,
-)
-from .thescene import TheSceneIE
-from .thestar import TheStarIE
-from .thesun import TheSunIE
-from .theweatherchannel import TheWeatherChannelIE
-from .thisamericanlife import ThisAmericanLifeIE
-from .thisav import ThisAVIE
-from .thisoldhouse import ThisOldHouseIE
-from .threeqsdn import ThreeQSDNIE
-from .tiktok import (
- TikTokIE,
- TikTokUserIE,
-)
-from .tinypic import TinyPicIE
-from .tmz import (
- TMZIE,
- TMZArticleIE,
-)
-from .tnaflix import (
- TNAFlixNetworkEmbedIE,
- TNAFlixIE,
- EMPFlixIE,
- MovieFapIE,
-)
-from .toggle import ToggleIE
-from .tonline import TOnlineIE
-from .toongoggles import ToonGogglesIE
-from .toutv import TouTvIE
-from .toypics import ToypicsUserIE, ToypicsIE
-from .traileraddict import TrailerAddictIE
-from .trilulilu import TriluliluIE
-from .trunews import TruNewsIE
-from .trutv import TruTVIE
-from .tube8 import Tube8IE
-from .tubitv import TubiTvIE
-from .tumblr import TumblrIE
-from .tunein import (
- TuneInClipIE,
- TuneInStationIE,
- TuneInProgramIE,
- TuneInTopicIE,
- TuneInShortenerIE,
-)
-from .tunepk import TunePkIE
-from .turbo import TurboIE
-from .tutv import TutvIE
-from .tv2 import (
- TV2IE,
- TV2ArticleIE,
-)
-from .tv2hu import TV2HuIE
-from .tv4 import TV4IE
-from .tv5mondeplus import TV5MondePlusIE
-from .tva import TVAIE
-from .tvanouvelles import (
- TVANouvellesIE,
- TVANouvellesArticleIE,
-)
-from .tvc import (
- TVCIE,
- TVCArticleIE,
-)
-from .tvigle import TvigleIE
-from .tvland import TVLandIE
-from .tvn24 import TVN24IE
-from .tvnet import TVNetIE
-from .tvnoe import TVNoeIE
-from .tvnow import (
- TVNowIE,
- TVNowNewIE,
- TVNowSeasonIE,
- TVNowAnnualIE,
- TVNowShowIE,
-)
-from .tvp import (
- TVPEmbedIE,
- TVPIE,
- TVPWebsiteIE,
-)
-from .tvplay import (
- TVPlayIE,
- ViafreeIE,
- TVPlayHomeIE,
-)
-from .tvplayer import TVPlayerIE
-from .tweakers import TweakersIE
-from .twentyfourvideo import TwentyFourVideoIE
-from .twentymin import TwentyMinutenIE
-from .twentythreevideo import TwentyThreeVideoIE
-from .twitcasting import TwitCastingIE
-from .twitch import (
- TwitchVideoIE,
- TwitchChapterIE,
- TwitchVodIE,
- TwitchProfileIE,
- TwitchAllVideosIE,
- TwitchUploadsIE,
- TwitchPastBroadcastsIE,
- TwitchHighlightsIE,
- TwitchStreamIE,
- TwitchClipsIE,
-)
-from .twitter import (
- TwitterCardIE,
- TwitterIE,
- TwitterAmplifyIE,
-)
-from .udemy import (
- UdemyIE,
- UdemyCourseIE
-)
-from .udn import UDNEmbedIE
-from .ufctv import UFCTVIE
-from .uktvplay import UKTVPlayIE
-from .digiteka import DigitekaIE
-from .dlive import (
- DLiveVODIE,
- DLiveStreamIE,
-)
-from .umg import UMGDeIE
-from .unistra import UnistraIE
-from .unity import UnityIE
-from .uol import UOLIE
-from .uplynk import (
- UplynkIE,
- UplynkPreplayIE,
-)
-from .urort import UrortIE
-from .urplay import URPlayIE
-from .usanetwork import USANetworkIE
-from .usatoday import USATodayIE
-from .ustream import UstreamIE, UstreamChannelIE
-from .ustudio import (
- UstudioIE,
- UstudioEmbedIE,
-)
-from .varzesh3 import Varzesh3IE
-from .vbox7 import Vbox7IE
-from .veehd import VeeHDIE
-from .veoh import VeohIE
-from .vesti import VestiIE
-from .vevo import (
- VevoIE,
- VevoPlaylistIE,
-)
-from .vgtv import (
- BTArticleIE,
- BTVestlendingenIE,
- VGTVIE,
-)
-from .vh1 import VH1IE
-from .vice import (
- ViceIE,
- ViceArticleIE,
- ViceShowIE,
-)
-from .vidbit import VidbitIE
-from .viddler import ViddlerIE
-from .videa import VideaIE
-from .videodetective import VideoDetectiveIE
-from .videofyme import VideofyMeIE
-from .videomore import (
- VideomoreIE,
- VideomoreVideoIE,
- VideomoreSeasonIE,
-)
-from .videopremium import VideoPremiumIE
-from .videopress import VideoPressIE
-from .vidio import VidioIE
-from .vidlii import VidLiiIE
-from .vidme import (
- VidmeIE,
- VidmeUserIE,
- VidmeUserLikesIE,
-)
-from .vidzi import VidziIE
-from .vier import VierIE, VierVideosIE
-from .viewlift import (
- ViewLiftIE,
- ViewLiftEmbedIE,
-)
-from .viewster import ViewsterIE
-from .viidea import ViideaIE
-from .vimeo import (
- VimeoIE,
- VimeoAlbumIE,
- VimeoChannelIE,
- VimeoGroupsIE,
- VimeoLikesIE,
- VimeoOndemandIE,
- VimeoReviewIE,
- VimeoUserIE,
- VimeoWatchLaterIE,
- VHXEmbedIE,
-)
-from .vimple import VimpleIE
-from .vine import (
- VineIE,
- VineUserIE,
-)
-from .viki import (
- VikiIE,
- VikiChannelIE,
-)
-from .viqeo import ViqeoIE
-from .viu import (
- ViuIE,
- ViuPlaylistIE,
- ViuOTTIE,
-)
-from .vk import (
- VKIE,
- VKUserVideosIE,
- VKWallPostIE,
-)
-from .vlive import (
- VLiveIE,
- VLiveChannelIE,
- VLivePlaylistIE
-)
-from .vodlocker import VodlockerIE
-from .vodpl import VODPlIE
-from .vodplatform import VODPlatformIE
-from .voicerepublic import VoiceRepublicIE
-from .voot import VootIE
-from .voxmedia import (
- VoxMediaVolumeIE,
- VoxMediaIE,
-)
-from .vrt import VRTIE
-from .vrak import VrakIE
-from .vrv import (
- VRVIE,
- VRVSeriesIE,
-)
-from .vshare import VShareIE
-from .medialaan import MedialaanIE
-from .vube import VubeIE
-from .vuclip import VuClipIE
-from .vvvvid import VVVVIDIE
-from .vyborymos import VyboryMosIE
-from .vzaar import VzaarIE
-from .wakanim import WakanimIE
-from .walla import WallaIE
-from .washingtonpost import (
- WashingtonPostIE,
- WashingtonPostArticleIE,
-)
-from .wat import WatIE
-from .watchbox import WatchBoxIE
-from .watchindianporn import WatchIndianPornIE
-from .wdr import (
- WDRIE,
- WDRPageIE,
- WDRElefantIE,
- WDRMobileIE,
-)
-from .webcaster import (
- WebcasterIE,
- WebcasterFeedIE,
-)
-from .webofstories import (
- WebOfStoriesIE,
- WebOfStoriesPlaylistIE,
-)
-from .weibo import (
- WeiboIE,
- WeiboMobileIE
-)
-from .weiqitv import WeiqiTVIE
-from .wistia import WistiaIE
-from .worldstarhiphop import WorldStarHipHopIE
-from .wsj import (
- WSJIE,
- WSJArticleIE,
-)
-from .wwe import WWEIE
-from .xbef import XBefIE
-from .xboxclips import XboxClipsIE
-from .xfileshare import XFileShareIE
-from .xhamster import (
- XHamsterIE,
- XHamsterEmbedIE,
- XHamsterUserIE,
-)
-from .xiami import (
- XiamiSongIE,
- XiamiAlbumIE,
- XiamiArtistIE,
- XiamiCollectionIE
-)
-from .ximalaya import (
- XimalayaIE,
- XimalayaAlbumIE
-)
-from .xminus import XMinusIE
-from .xnxx import XNXXIE
-from .xstream import XstreamIE
-from .xtube import XTubeUserIE, XTubeIE
-from .xuite import XuiteIE
-from .xvideos import XVideosIE
-from .xxxymovies import XXXYMoviesIE
-from .yahoo import (
- YahooIE,
- YahooSearchIE,
- YahooGyaOPlayerIE,
- YahooGyaOIE,
- YahooJapanNewsIE,
-)
-from .yandexdisk import YandexDiskIE
-from .yandexmusic import (
- YandexMusicTrackIE,
- YandexMusicAlbumIE,
- YandexMusicPlaylistIE,
-)
-from .yandexvideo import YandexVideoIE
-from .yapfiles import YapFilesIE
-from .yesjapan import YesJapanIE
-from .yinyuetai import YinYueTaiIE
-from .ynet import YnetIE
-from .youjizz import YouJizzIE
-from .youku import (
- YoukuIE,
- YoukuShowIE,
-)
-from .younow import (
- YouNowLiveIE,
- YouNowChannelIE,
- YouNowMomentIE,
-)
-from .youporn import YouPornIE
-from .yourporn import YourPornIE
-from .yourupload import YourUploadIE
-from .youtube import (
- YoutubeIE,
- YoutubeChannelIE,
- YoutubeFavouritesIE,
- YoutubeHistoryIE,
- YoutubeLiveIE,
- YoutubePlaylistIE,
- YoutubePlaylistsIE,
- YoutubeRecommendedIE,
- YoutubeSearchDateIE,
- YoutubeSearchIE,
- YoutubeSearchURLIE,
- YoutubeShowIE,
- YoutubeSubscriptionsIE,
- YoutubeTruncatedIDIE,
- YoutubeTruncatedURLIE,
- YoutubeUserIE,
- YoutubeWatchLaterIE,
-)
-from .zapiks import ZapiksIE
-from .zaq1 import Zaq1IE
-from .zattoo import (
- BBVTVIE,
- EinsUndEinsTVIE,
- EWETVIE,
- GlattvisionTVIE,
- MNetTVIE,
- MyVisionTVIE,
- NetPlusIE,
- OsnatelTVIE,
- QuantumTVIE,
- QuicklineIE,
- QuicklineLiveIE,
- SaltTVIE,
- SAKTVIE,
- VTXTVIE,
- WalyTVIE,
- ZattooIE,
- ZattooLiveIE,
-)
-from .zdf import ZDFIE, ZDFChannelIE
-from .zingmp3 import ZingMp3IE
-from .zype import ZypeIE
diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py
deleted file mode 100644
index a3dcdca3e..000000000
--- a/youtube_dl/extractor/facebook.py
+++ /dev/null
@@ -1,501 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-import socket
-
-from .common import InfoExtractor
-from ..compat import (
- compat_etree_fromstring,
- compat_http_client,
- compat_urllib_error,
- compat_urllib_parse_unquote,
- compat_urllib_parse_unquote_plus,
-)
-from ..utils import (
- clean_html,
- error_to_compat_str,
- ExtractorError,
- get_element_by_id,
- int_or_none,
- js_to_json,
- limit_length,
- parse_count,
- sanitized_Request,
- try_get,
- urlencode_postdata,
-)
-
-
-class FacebookIE(InfoExtractor):
- _VALID_URL = r'''(?x)
- (?:
- https?://
- (?:[\w-]+\.)?(?:facebook\.com|facebookcorewwwi\.onion)/
- (?:[^#]*?\#!/)?
- (?:
- (?:
- video/video\.php|
- photo\.php|
- video\.php|
- video/embed|
- story\.php
- )\?(?:.*?)(?:v|video_id|story_fbid)=|
- [^/]+/videos/(?:[^/]+/)?|
- [^/]+/posts/|
- groups/[^/]+/permalink/
- )|
- facebook:
- )
- (?P<id>[0-9]+)
- '''
- _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
- _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
- _NETRC_MACHINE = 'facebook'
- IE_NAME = 'facebook'
-
- _CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
-
- _VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
- _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
-
- _TESTS = [{
- 'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
- 'md5': '6a40d33c0eccbb1af76cf0485a052659',
- 'info_dict': {
- 'id': '637842556329505',
- 'ext': 'mp4',
- 'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam',
- 'uploader': 'Tennis on Facebook',
- 'upload_date': '20140908',
- 'timestamp': 1410199200,
- },
- 'skip': 'Requires logging in',
- }, {
- 'url': 'https://www.facebook.com/video.php?v=274175099429670',
- 'info_dict': {
- 'id': '274175099429670',
- 'ext': 'mp4',
- 'title': 're:^Asif Nawab Butt posted a video',
- 'uploader': 'Asif Nawab Butt',
- 'upload_date': '20140506',
- 'timestamp': 1399398998,
- 'thumbnail': r're:^https?://.*',
- },
- 'expected_warnings': [
- 'title'
- ]
- }, {
- 'note': 'Video with DASH manifest',
- 'url': 'https://www.facebook.com/video.php?v=957955867617029',
- 'md5': 'b2c28d528273b323abe5c6ab59f0f030',
- 'info_dict': {
- 'id': '957955867617029',
- 'ext': 'mp4',
- 'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...',
- 'uploader': 'Demy de Zeeuw',
- 'upload_date': '20160110',
- 'timestamp': 1452431627,
- },
- 'skip': 'Requires logging in',
- }, {
- 'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
- 'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6',
- 'info_dict': {
- 'id': '544765982287235',
- 'ext': 'mp4',
- 'title': '"What are you doing running in the snow?"',
- 'uploader': 'FailArmy',
- },
- 'skip': 'Video gone',
- }, {
- 'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903',
- 'md5': '1deb90b6ac27f7efcf6d747c8a27f5e3',
- 'info_dict': {
- 'id': '1035862816472149',
- 'ext': 'mp4',
- 'title': 'What the Flock Is Going On In New Zealand Credit: ViralHog',
- 'uploader': 'S. Saint',
- },
- 'skip': 'Video gone',
- }, {
- 'note': 'swf params escaped',
- 'url': 'https://www.facebook.com/barackobama/posts/10153664894881749',
- 'md5': '97ba073838964d12c70566e0085c2b91',
- 'info_dict': {
- 'id': '10153664894881749',
- 'ext': 'mp4',
- 'title': 'Average time to confirm recent Supreme Court nominees: 67 days Longest it\'s t...',
- 'thumbnail': r're:^https?://.*',
- 'timestamp': 1456259628,
- 'upload_date': '20160223',
- 'uploader': 'Barack Obama',
- },
- }, {
- # have 1080P, but only up to 720p in swf params
- 'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
- 'md5': '9571fae53d4165bbbadb17a94651dcdc',
- 'info_dict': {
- 'id': '10155529876156509',
- 'ext': 'mp4',
- 'title': 'She survived the holocaust — and years later, she’s getting her citizenship s...',
- 'timestamp': 1477818095,
- 'upload_date': '20161030',
- 'uploader': 'CNN',
- 'thumbnail': r're:^https?://.*',
- 'view_count': int,
- },
- }, {
- # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
- 'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
- 'info_dict': {
- 'id': '1417995061575415',
- 'ext': 'mp4',
- 'title': 'md5:1db063d6a8c13faa8da727817339c857',
- 'timestamp': 1486648217,
- 'upload_date': '20170209',
- 'uploader': 'Yaroslav Korpan',
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'https://www.facebook.com/LaGuiaDelVaron/posts/1072691702860471',
- 'info_dict': {
- 'id': '1072691702860471',
- 'ext': 'mp4',
- 'title': 'md5:ae2d22a93fbb12dad20dc393a869739d',
- 'timestamp': 1477305000,
- 'upload_date': '20161024',
- 'uploader': 'La Guía Del Varón',
- 'thumbnail': r're:^https?://.*',
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/',
- 'info_dict': {
- 'id': '1396382447100162',
- 'ext': 'mp4',
- 'title': 'md5:19a428bbde91364e3de815383b54a235',
- 'timestamp': 1486035494,
- 'upload_date': '20170202',
- 'uploader': 'Elisabeth Ahtn',
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'https://www.facebook.com/video.php?v=10204634152394104',
- 'only_matching': True,
- }, {
- 'url': 'https://www.facebook.com/amogood/videos/1618742068337349/?fref=nf',
- 'only_matching': True,
- }, {
- 'url': 'https://www.facebook.com/ChristyClarkForBC/videos/vb.22819070941/10153870694020942/?type=2&theater',
- 'only_matching': True,
- }, {
- 'url': 'facebook:544765982287235',
- 'only_matching': True,
- }, {
- 'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/',
- 'only_matching': True,
- }, {
- 'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/',
- 'only_matching': True,
- }, {
- 'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670',
- 'only_matching': True,
- }, {
- # no title
- 'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/',
- 'only_matching': True,
- }, {
- 'url': 'https://www.facebook.com/WatchESLOne/videos/359649331226507/',
- 'info_dict': {
- 'id': '359649331226507',
- 'ext': 'mp4',
- 'title': '#ESLOne VoD - Birmingham Finals Day#1 Fnatic vs. @Evil Geniuses',
- 'uploader': 'ESL One Dota 2',
- },
- 'params': {
- 'skip_download': True,
- },
- }]
-
- @staticmethod
- def _extract_urls(webpage):
- urls = []
- for mobj in re.finditer(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://www\.facebook\.com/(?:video/embed|plugins/video\.php).+?)\1',
- webpage):
- urls.append(mobj.group('url'))
- # Facebook API embed
- # see https://developers.facebook.com/docs/plugins/embedded-video-player
- for mobj in re.finditer(r'''(?x)<div[^>]+
- class=(?P<q1>[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+
- data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', webpage):
- urls.append(mobj.group('url'))
- return urls
-
- def _login(self):
- useremail, password = self._get_login_info()
- if useremail is None:
- return
-
- login_page_req = sanitized_Request(self._LOGIN_URL)
- self._set_cookie('facebook.com', 'locale', 'en_US')
- login_page = self._download_webpage(login_page_req, None,
- note='Downloading login page',
- errnote='Unable to download login page')
- lsd = self._search_regex(
- r'<input type="hidden" name="lsd" value="([^"]*)"',
- login_page, 'lsd')
- lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, 'lgnrnd')
-
- login_form = {
- 'email': useremail,
- 'pass': password,
- 'lsd': lsd,
- 'lgnrnd': lgnrnd,
- 'next': 'http://facebook.com/home.php',
- 'default_persistent': '0',
- 'legacy_return': '1',
- 'timezone': '-60',
- 'trynum': '1',
- }
- request = sanitized_Request(self._LOGIN_URL, urlencode_postdata(login_form))
- request.add_header('Content-Type', 'application/x-www-form-urlencoded')
- try:
- login_results = self._download_webpage(request, None,
- note='Logging in', errnote='unable to fetch login page')
- if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
- error = self._html_search_regex(
- r'(?s)<div[^>]+class=(["\']).*?login_error_box.*?\1[^>]*><div[^>]*>.*?</div><div[^>]*>(?P<error>.+?)</div>',
- login_results, 'login error', default=None, group='error')
- if error:
- raise ExtractorError('Unable to login: %s' % error, expected=True)
- self._downloader.report_warning('unable to log in: bad username/password, or exceeded login rate limit (~3/min). Check credentials or wait.')
- return
-
- fb_dtsg = self._search_regex(
- r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg', default=None)
- h = self._search_regex(
- r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h', default=None)
-
- if not fb_dtsg or not h:
- return
-
- check_form = {
- 'fb_dtsg': fb_dtsg,
- 'h': h,
- 'name_action_selected': 'dont_save',
- }
- check_req = sanitized_Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
- check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
- check_response = self._download_webpage(check_req, None,
- note='Confirming login')
- if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
- self._downloader.report_warning('Unable to confirm login, you have to login in your browser and authorize the login.')
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
- self._downloader.report_warning('unable to log in: %s' % error_to_compat_str(err))
- return
-
- def _real_initialize(self):
- self._login()
-
- def _extract_from_url(self, url, video_id, fatal_if_no_video=True):
- req = sanitized_Request(url)
- req.add_header('User-Agent', self._CHROME_USER_AGENT)
- webpage = self._download_webpage(req, video_id)
-
- video_data = None
-
- def extract_video_data(instances):
- for item in instances:
- if item[1][0] == 'VideoConfig':
- video_item = item[2][0]
- if video_item.get('video_id'):
- return video_item['videoData']
-
- server_js_data = self._parse_json(self._search_regex(
- r'handleServerJS\(({.+})(?:\);|,")', webpage,
- 'server js data', default='{}'), video_id, fatal=False)
-
- if server_js_data:
- video_data = extract_video_data(server_js_data.get('instances', []))
-
- def extract_from_jsmods_instances(js_data):
- if js_data:
- return extract_video_data(try_get(
- js_data, lambda x: x['jsmods']['instances'], list) or [])
-
- if not video_data:
- server_js_data = self._parse_json(
- self._search_regex(
- r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:stream_pagelet|pagelet_group_mall|permalink_video_pagelet)',
- webpage, 'js data', default='{}'),
- video_id, transform_source=js_to_json, fatal=False)
- video_data = extract_from_jsmods_instances(server_js_data)
-
- if not video_data:
- if not fatal_if_no_video:
- return webpage, False
- m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
- if m_msg is not None:
- raise ExtractorError(
- 'The video is not available, Facebook said: "%s"' % m_msg.group(1),
- expected=True)
- elif '>You must log in to continue' in webpage:
- self.raise_login_required()
-
- # Video info not in first request, do a secondary request using
- # tahoe player specific URL
- tahoe_data = self._download_webpage(
- self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id,
- data=urlencode_postdata({
- '__a': 1,
- '__pc': self._search_regex(
- r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage,
- 'pkg cohort', default='PHASED:DEFAULT'),
- '__rev': self._search_regex(
- r'client_revision["\']\s*:\s*(\d+),', webpage,
- 'client revision', default='3944515'),
- 'fb_dtsg': self._search_regex(
- r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"',
- webpage, 'dtsg token', default=''),
- }),
- headers={
- 'Content-Type': 'application/x-www-form-urlencoded',
- })
- tahoe_js_data = self._parse_json(
- self._search_regex(
- r'for\s+\(\s*;\s*;\s*\)\s*;(.+)', tahoe_data,
- 'tahoe js data', default='{}'),
- video_id, fatal=False)
- video_data = extract_from_jsmods_instances(tahoe_js_data)
-
- if not video_data:
- raise ExtractorError('Cannot parse data')
-
- formats = []
- for f in video_data:
- format_id = f['stream_type']
- if f and isinstance(f, dict):
- f = [f]
- if not f or not isinstance(f, list):
- continue
- for quality in ('sd', 'hd'):
- for src_type in ('src', 'src_no_ratelimit'):
- src = f[0].get('%s_%s' % (quality, src_type))
- if src:
- preference = -10 if format_id == 'progressive' else 0
- if quality == 'hd':
- preference += 5
- formats.append({
- 'format_id': '%s_%s_%s' % (format_id, quality, src_type),
- 'url': src,
- 'preference': preference,
- })
- dash_manifest = f[0].get('dash_manifest')
- if dash_manifest:
- formats.extend(self._parse_mpd_formats(
- compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest))))
- if not formats:
- raise ExtractorError('Cannot find video formats')
-
- self._sort_formats(formats)
-
- video_title = self._html_search_regex(
- r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage,
- 'title', default=None)
- if not video_title:
- video_title = self._html_search_regex(
- r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
- webpage, 'alternative title', default=None)
- if not video_title:
- video_title = self._html_search_meta(
- 'description', webpage, 'title', default=None)
- if video_title:
- video_title = limit_length(video_title, 80)
- else:
- video_title = 'Facebook video #%s' % video_id
- uploader = clean_html(get_element_by_id(
- 'fbPhotoPageAuthorName', webpage)) or self._search_regex(
- r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader',
- default=None) or self._og_search_title(webpage, fatal=False)
- timestamp = int_or_none(self._search_regex(
- r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
- 'timestamp', default=None))
- thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
-
- view_count = parse_count(self._search_regex(
- r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count',
- default=None))
-
- info_dict = {
- 'id': video_id,
- 'title': video_title,
- 'formats': formats,
- 'uploader': uploader,
- 'timestamp': timestamp,
- 'thumbnail': thumbnail,
- 'view_count': view_count,
- }
-
- return webpage, info_dict
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- real_url = self._VIDEO_PAGE_TEMPLATE % video_id if url.startswith('facebook:') else url
- webpage, info_dict = self._extract_from_url(real_url, video_id, fatal_if_no_video=False)
-
- if info_dict:
- return info_dict
-
- if '/posts/' in url:
- entries = [
- self.url_result('facebook:%s' % vid, FacebookIE.ie_key())
- for vid in self._parse_json(
- self._search_regex(
- r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])',
- webpage, 'video ids', group='ids'),
- video_id)]
-
- return self.playlist_result(entries, video_id)
- else:
- _, info_dict = self._extract_from_url(
- self._VIDEO_PAGE_TEMPLATE % video_id,
- video_id, fatal_if_no_video=True)
- return info_dict
-
-
-class FacebookPluginsVideoIE(InfoExtractor):
- _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/plugins/video\.php\?.*?\bhref=(?P<id>https.+)'
-
- _TESTS = [{
- 'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fgov.sg%2Fvideos%2F10154383743583686%2F&show_text=0&width=560',
- 'md5': '5954e92cdfe51fe5782ae9bda7058a07',
- 'info_dict': {
- 'id': '10154383743583686',
- 'ext': 'mp4',
- 'title': 'What to do during the haze?',
- 'uploader': 'Gov.sg',
- 'upload_date': '20160826',
- 'timestamp': 1472184808,
- },
- 'add_ie': [FacebookIE.ie_key()],
- }, {
- 'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fvideo.php%3Fv%3D10204634152394104',
- 'only_matching': True,
- }, {
- 'url': 'https://www.facebook.com/plugins/video.php?href=https://www.facebook.com/gov.sg/videos/10154383743583686/&show_text=0&width=560',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- return self.url_result(
- compat_urllib_parse_unquote(self._match_id(url)),
- FacebookIE.ie_key())
diff --git a/youtube_dl/extractor/flipagram.py b/youtube_dl/extractor/flipagram.py
deleted file mode 100644
index b7be40f1b..000000000
--- a/youtube_dl/extractor/flipagram.py
+++ /dev/null
@@ -1,115 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- int_or_none,
- float_or_none,
- try_get,
- unified_timestamp,
-)
-
-
-class FlipagramIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?flipagram\.com/f/(?P<id>[^/?#&]+)'
- _TEST = {
- 'url': 'https://flipagram.com/f/nyvTSJMKId',
- 'md5': '888dcf08b7ea671381f00fab74692755',
- 'info_dict': {
- 'id': 'nyvTSJMKId',
- 'ext': 'mp4',
- 'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
- 'description': 'md5:d55e32edc55261cae96a41fa85ff630e',
- 'duration': 35.571,
- 'timestamp': 1461244995,
- 'upload_date': '20160421',
- 'uploader': 'kitty juria',
- 'uploader_id': 'sjuria101',
- 'creator': 'kitty juria',
- 'view_count': int,
- 'like_count': int,
- 'repost_count': int,
- 'comment_count': int,
- 'comments': list,
- 'formats': 'mincount:2',
- },
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- video_data = self._parse_json(
- self._search_regex(
- r'window\.reactH2O\s*=\s*({.+});', webpage, 'video data'),
- video_id)
-
- flipagram = video_data['flipagram']
- video = flipagram['video']
-
- json_ld = self._search_json_ld(webpage, video_id, default={})
- title = json_ld.get('title') or flipagram['captionText']
- description = json_ld.get('description') or flipagram.get('captionText')
-
- formats = [{
- 'url': video['url'],
- 'width': int_or_none(video.get('width')),
- 'height': int_or_none(video.get('height')),
- 'filesize': int_or_none(video_data.get('size')),
- }]
-
- preview_url = try_get(
- flipagram, lambda x: x['music']['track']['previewUrl'], compat_str)
- if preview_url:
- formats.append({
- 'url': preview_url,
- 'ext': 'm4a',
- 'vcodec': 'none',
- })
-
- self._sort_formats(formats)
-
- counts = flipagram.get('counts', {})
- user = flipagram.get('user', {})
- video_data = flipagram.get('video', {})
-
- thumbnails = [{
- 'url': self._proto_relative_url(cover['url']),
- 'width': int_or_none(cover.get('width')),
- 'height': int_or_none(cover.get('height')),
- 'filesize': int_or_none(cover.get('size')),
- } for cover in flipagram.get('covers', []) if cover.get('url')]
-
- # Note that this only retrieves comments that are initially loaded.
- # For videos with large amounts of comments, most won't be retrieved.
- comments = []
- for comment in video_data.get('comments', {}).get(video_id, {}).get('items', []):
- text = comment.get('comment')
- if not text or not isinstance(text, list):
- continue
- comments.append({
- 'author': comment.get('user', {}).get('name'),
- 'author_id': comment.get('user', {}).get('username'),
- 'id': comment.get('id'),
- 'text': text[0],
- 'timestamp': unified_timestamp(comment.get('created')),
- })
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'duration': float_or_none(flipagram.get('duration'), 1000),
- 'thumbnails': thumbnails,
- 'timestamp': unified_timestamp(flipagram.get('iso8601Created')),
- 'uploader': user.get('name'),
- 'uploader_id': user.get('username'),
- 'creator': user.get('name'),
- 'view_count': int_or_none(counts.get('plays')),
- 'like_count': int_or_none(counts.get('likes')),
- 'repost_count': int_or_none(counts.get('reflips')),
- 'comment_count': int_or_none(counts.get('comments')),
- 'comments': comments,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/fox9.py b/youtube_dl/extractor/fox9.py
deleted file mode 100644
index 17dfffa7b..000000000
--- a/youtube_dl/extractor/fox9.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .anvato import AnvatoIE
-
-
-class FOX9IE(AnvatoIE):
- _VALID_URL = r'https?://(?:www\.)?fox9\.com/(?:[^/]+/)+(?P<id>\d+)-story'
- _TESTS = [{
- 'url': 'http://www.fox9.com/news/215123287-story',
- 'md5': 'd6e1b2572c3bab8a849c9103615dd243',
- 'info_dict': {
- 'id': '314473',
- 'ext': 'mp4',
- 'title': 'Bear climbs tree in downtown Duluth',
- 'description': 'md5:6a36bfb5073a411758a752455408ac90',
- 'duration': 51,
- 'timestamp': 1478123580,
- 'upload_date': '20161102',
- 'uploader': 'EPFOX',
- 'categories': ['News', 'Sports'],
- 'tags': ['news', 'video'],
- },
- }, {
- 'url': 'http://www.fox9.com/news/investigators/214070684-story',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- video_id = self._parse_json(
- self._search_regex(
- r"this\.videosJson\s*=\s*'(\[.+?\])';",
- webpage, 'anvato playlist'),
- video_id)[0]['video']
-
- return self._get_anvato_videos(
- 'anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b',
- video_id)
diff --git a/youtube_dl/extractor/franceculture.py b/youtube_dl/extractor/franceculture.py
deleted file mode 100644
index b8fa17588..000000000
--- a/youtube_dl/extractor/franceculture.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- determine_ext,
- extract_attributes,
- int_or_none,
-)
-
-
-class FranceCultureIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)'
- _TEST = {
- 'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
- 'info_dict': {
- 'id': 'rendez-vous-au-pays-des-geeks',
- 'display_id': 'rendez-vous-au-pays-des-geeks',
- 'ext': 'mp3',
- 'title': 'Rendez-vous au pays des geeks',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'upload_date': '20140301',
- 'timestamp': 1393642916,
- 'vcodec': 'none',
- }
- }
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- video_data = extract_attributes(self._search_regex(
- r'(?s)<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>.*?(<button[^>]+data-asset-source="[^"]+"[^>]+>)',
- webpage, 'video data'))
-
- video_url = video_data['data-asset-source']
- title = video_data.get('data-asset-title') or self._og_search_title(webpage)
-
- description = self._html_search_regex(
- r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>',
- webpage, 'description', default=None)
- thumbnail = self._search_regex(
- r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"',
- webpage, 'thumbnail', fatal=False)
- uploader = self._html_search_regex(
- r'(?s)<span class="author">(.*?)</span>',
- webpage, 'uploader', default=None)
- ext = determine_ext(video_url.lower())
-
- return {
- 'id': display_id,
- 'display_id': display_id,
- 'url': video_url,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'ext': ext,
- 'vcodec': 'none' if ext == 'mp3' else None,
- 'uploader': uploader,
- 'timestamp': int_or_none(video_data.get('data-asset-created-date')),
- 'duration': int_or_none(video_data.get('data-duration')),
- }
diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py
deleted file mode 100644
index 81b468c7d..000000000
--- a/youtube_dl/extractor/francetv.py
+++ /dev/null
@@ -1,516 +0,0 @@
-# coding: utf-8
-
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import (
- compat_str,
- compat_urlparse,
-)
-from ..utils import (
- clean_html,
- determine_ext,
- ExtractorError,
- int_or_none,
- parse_duration,
- try_get,
- url_or_none,
-)
-from .dailymotion import DailymotionIE
-
-
-class FranceTVBaseInfoExtractor(InfoExtractor):
- def _make_url_result(self, video_or_full_id, catalog=None):
- full_id = 'francetv:%s' % video_or_full_id
- if '@' not in video_or_full_id and catalog:
- full_id += '@%s' % catalog
- return self.url_result(
- full_id, ie=FranceTVIE.ie_key(),
- video_id=video_or_full_id.split('@')[0])
-
-
-class FranceTVIE(InfoExtractor):
- _VALID_URL = r'''(?x)
- (?:
- https?://
- sivideo\.webservices\.francetelevisions\.fr/tools/getInfosOeuvre/v2/\?
- .*?\bidDiffusion=[^&]+|
- (?:
- https?://videos\.francetv\.fr/video/|
- francetv:
- )
- (?P<id>[^@]+)(?:@(?P<catalog>.+))?
- )
- '''
-
- _TESTS = [{
- # without catalog
- 'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=162311093&callback=_jsonp_loader_callback_request_0',
- 'md5': 'c2248a8de38c4e65ea8fae7b5df2d84f',
- 'info_dict': {
- 'id': '162311093',
- 'ext': 'mp4',
- 'title': '13h15, le dimanche... - Les mystères de Jésus',
- 'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
- 'timestamp': 1502623500,
- 'upload_date': '20170813',
- },
- }, {
- # with catalog
- 'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=NI_1004933&catalogue=Zouzous&callback=_jsonp_loader_callback_request_4',
- 'only_matching': True,
- }, {
- 'url': 'http://videos.francetv.fr/video/NI_657393@Regions',
- 'only_matching': True,
- }, {
- 'url': 'francetv:162311093',
- 'only_matching': True,
- }, {
- 'url': 'francetv:NI_1004933@Zouzous',
- 'only_matching': True,
- }, {
- 'url': 'francetv:NI_983319@Info-web',
- 'only_matching': True,
- }, {
- 'url': 'francetv:NI_983319',
- 'only_matching': True,
- }, {
- 'url': 'francetv:NI_657393@Regions',
- 'only_matching': True,
- }, {
- # france-3 live
- 'url': 'francetv:SIM_France3',
- 'only_matching': True,
- }]
-
- def _extract_video(self, video_id, catalogue=None):
- # Videos are identified by idDiffusion so catalogue part is optional.
- # However when provided, some extra formats may be returned so we pass
- # it if available.
- info = self._download_json(
- 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/',
- video_id, 'Downloading video JSON', query={
- 'idDiffusion': video_id,
- 'catalogue': catalogue or '',
- })
-
- if info.get('status') == 'NOK':
- raise ExtractorError(
- '%s returned error: %s' % (self.IE_NAME, info['message']),
- expected=True)
- allowed_countries = info['videos'][0].get('geoblocage')
- if allowed_countries:
- georestricted = True
- geo_info = self._download_json(
- 'http://geo.francetv.fr/ws/edgescape.json', video_id,
- 'Downloading geo restriction info')
- country = geo_info['reponse']['geo_info']['country_code']
- if country not in allowed_countries:
- raise ExtractorError(
- 'The video is not available from your location',
- expected=True)
- else:
- georestricted = False
-
- def sign(manifest_url, manifest_id):
- for host in ('hdfauthftv-a.akamaihd.net', 'hdfauth.francetv.fr'):
- signed_url = url_or_none(self._download_webpage(
- 'https://%s/esi/TA' % host, video_id,
- 'Downloading signed %s manifest URL' % manifest_id,
- fatal=False, query={
- 'url': manifest_url,
- }))
- if signed_url:
- return signed_url
- return manifest_url
-
- is_live = None
-
- formats = []
- for video in info['videos']:
- if video['statut'] != 'ONLINE':
- continue
- video_url = video['url']
- if not video_url:
- continue
- if is_live is None:
- is_live = (try_get(
- video, lambda x: x['plages_ouverture'][0]['direct'],
- bool) is True) or '/live.francetv.fr/' in video_url
- format_id = video['format']
- ext = determine_ext(video_url)
- if ext == 'f4m':
- if georestricted:
- # See https://github.com/ytdl-org/youtube-dl/issues/3963
- # m3u8 urls work fine
- continue
- formats.extend(self._extract_f4m_formats(
- sign(video_url, format_id) + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
- video_id, f4m_id=format_id, fatal=False))
- elif ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- sign(video_url, format_id), video_id, 'mp4',
- entry_protocol='m3u8_native', m3u8_id=format_id,
- fatal=False))
- elif video_url.startswith('rtmp'):
- formats.append({
- 'url': video_url,
- 'format_id': 'rtmp-%s' % format_id,
- 'ext': 'flv',
- })
- else:
- if self._is_valid_url(video_url, video_id, format_id):
- formats.append({
- 'url': video_url,
- 'format_id': format_id,
- })
- self._sort_formats(formats)
-
- title = info['titre']
- subtitle = info.get('sous_titre')
- if subtitle:
- title += ' - %s' % subtitle
- title = title.strip()
-
- subtitles = {}
- subtitles_list = [{
- 'url': subformat['url'],
- 'ext': subformat.get('format'),
- } for subformat in info.get('subtitles', []) if subformat.get('url')]
- if subtitles_list:
- subtitles['fr'] = subtitles_list
-
- return {
- 'id': video_id,
- 'title': self._live_title(title) if is_live else title,
- 'description': clean_html(info['synopsis']),
- 'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
- 'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
- 'timestamp': int_or_none(info['diffusion']['timestamp']),
- 'is_live': is_live,
- 'formats': formats,
- 'subtitles': subtitles,
- }
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- catalog = mobj.group('catalog')
-
- if not video_id:
- qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
- video_id = qs.get('idDiffusion', [None])[0]
- catalog = qs.get('catalogue', [None])[0]
- if not video_id:
- raise ExtractorError('Invalid URL', expected=True)
-
- return self._extract_video(video_id, catalog)
-
-
-class FranceTVSiteIE(FranceTVBaseInfoExtractor):
- _VALID_URL = r'https?://(?:(?:www\.)?france\.tv|mobile\.france\.tv)/(?:[^/]+/)*(?P<id>[^/]+)\.html'
-
- _TESTS = [{
- 'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
- 'info_dict': {
- 'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1',
- 'ext': 'mp4',
- 'title': '13h15, le dimanche... - Les mystères de Jésus',
- 'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
- 'timestamp': 1502623500,
- 'upload_date': '20170813',
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': [FranceTVIE.ie_key()],
- }, {
- # france3
- 'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html',
- 'only_matching': True,
- }, {
- # france4
- 'url': 'https://www.france.tv/france-4/hero-corp/saison-1/134151-apres-le-calme.html',
- 'only_matching': True,
- }, {
- # france5
- 'url': 'https://www.france.tv/france-5/c-a-dire/saison-10/137013-c-a-dire.html',
- 'only_matching': True,
- }, {
- # franceo
- 'url': 'https://www.france.tv/france-o/archipels/132249-mon-ancetre-l-esclave.html',
- 'only_matching': True,
- }, {
- # france2 live
- 'url': 'https://www.france.tv/france-2/direct.html',
- 'only_matching': True,
- }, {
- 'url': 'https://www.france.tv/documentaires/histoire/136517-argentine-les-500-bebes-voles-de-la-dictature.html',
- 'only_matching': True,
- }, {
- 'url': 'https://www.france.tv/jeux-et-divertissements/divertissements/133965-le-web-contre-attaque.html',
- 'only_matching': True,
- }, {
- 'url': 'https://mobile.france.tv/france-5/c-dans-l-air/137347-emission-du-vendredi-12-mai-2017.html',
- 'only_matching': True,
- }, {
- 'url': 'https://www.france.tv/142749-rouge-sang.html',
- 'only_matching': True,
- }, {
- # france-3 live
- 'url': 'https://www.france.tv/france-3/direct.html',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- catalogue = None
- video_id = self._search_regex(
- r'(?:data-main-video\s*=|videoId["\']?\s*[:=])\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
- webpage, 'video id', default=None, group='id')
-
- if not video_id:
- video_id, catalogue = self._html_search_regex(
- r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"',
- webpage, 'video ID').split('@')
-
- return self._make_url_result(video_id, catalogue)
-
-
-class FranceTVEmbedIE(FranceTVBaseInfoExtractor):
- _VALID_URL = r'https?://embed\.francetv\.fr/*\?.*?\bue=(?P<id>[^&]+)'
-
- _TESTS = [{
- 'url': 'http://embed.francetv.fr/?ue=7fd581a2ccf59d2fc5719c5c13cf6961',
- 'info_dict': {
- 'id': 'NI_983319',
- 'ext': 'mp4',
- 'title': 'Le Pen Reims',
- 'upload_date': '20170505',
- 'timestamp': 1493981780,
- 'duration': 16,
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': [FranceTVIE.ie_key()],
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- video = self._download_json(
- 'http://api-embed.webservices.francetelevisions.fr/key/%s' % video_id,
- video_id)
-
- return self._make_url_result(video['video_id'], video.get('catalog'))
-
-
-class FranceTVInfoIE(FranceTVBaseInfoExtractor):
- IE_NAME = 'francetvinfo.fr'
- _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&.]+)'
-
- _TESTS = [{
- 'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
- 'info_dict': {
- 'id': '84981923',
- 'ext': 'mp4',
- 'title': 'Soir 3',
- 'upload_date': '20130826',
- 'timestamp': 1377548400,
- 'subtitles': {
- 'fr': 'mincount:2',
- },
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': [FranceTVIE.ie_key()],
- }, {
- 'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html',
- 'only_matching': True,
- }, {
- 'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html',
- 'only_matching': True,
- }, {
- 'url': 'http://france3-regions.francetvinfo.fr/bretagne/cotes-d-armor/thalassa-echappee-breizh-ce-venredi-dans-les-cotes-d-armor-954961.html',
- 'only_matching': True,
- }, {
- # Dailymotion embed
- 'url': 'http://www.francetvinfo.fr/politique/notre-dame-des-landes/video-sur-france-inter-cecile-duflot-denonce-le-regard-meprisant-de-patrick-cohen_1520091.html',
- 'md5': 'ee7f1828f25a648addc90cb2687b1f12',
- 'info_dict': {
- 'id': 'x4iiko0',
- 'ext': 'mp4',
- 'title': 'NDDL, référendum, Brexit : Cécile Duflot répond à Patrick Cohen',
- 'description': 'Au lendemain de la victoire du "oui" au référendum sur l\'aéroport de Notre-Dame-des-Landes, l\'ancienne ministre écologiste est l\'invitée de Patrick Cohen. Plus d\'info : https://www.franceinter.fr/emissions/le-7-9/le-7-9-27-juin-2016',
- 'timestamp': 1467011958,
- 'upload_date': '20160627',
- 'uploader': 'France Inter',
- 'uploader_id': 'x2q2ez',
- },
- 'add_ie': ['Dailymotion'],
- }, {
- 'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- dailymotion_urls = DailymotionIE._extract_urls(webpage)
- if dailymotion_urls:
- return self.playlist_result([
- self.url_result(dailymotion_url, DailymotionIE.ie_key())
- for dailymotion_url in dailymotion_urls])
-
- video_id = self._search_regex(
- (r'player\.load[^;]+src:\s*["\']([^"\']+)',
- r'id-video=([^@]+@[^"]+)',
- r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"'),
- webpage, 'video id')
-
- return self._make_url_result(video_id)
-
-
-class FranceTVInfoSportIE(FranceTVBaseInfoExtractor):
- IE_NAME = 'sport.francetvinfo.fr'
- _VALID_URL = r'https?://sport\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&]+)'
- _TESTS = [{
- 'url': 'https://sport.francetvinfo.fr/les-jeux-olympiques/retour-sur-les-meilleurs-moments-de-pyeongchang-2018',
- 'info_dict': {
- 'id': '6e49080e-3f45-11e8-b459-000d3a2439ea',
- 'ext': 'mp4',
- 'title': 'Retour sur les meilleurs moments de Pyeongchang 2018',
- 'timestamp': 1523639962,
- 'upload_date': '20180413',
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': [FranceTVIE.ie_key()],
- }]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
- video_id = self._search_regex(r'data-video="([^"]+)"', webpage, 'video_id')
- return self._make_url_result(video_id, 'Sport-web')
-
-
-class GenerationWhatIE(InfoExtractor):
- IE_NAME = 'france2.fr:generation-what'
- _VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P<id>[^/?#&]+)'
-
- _TESTS = [{
- 'url': 'http://generation-what.francetv.fr/portrait/video/present-arms',
- 'info_dict': {
- 'id': 'wtvKYUG45iw',
- 'ext': 'mp4',
- 'title': 'Generation What - Garde à vous - FRA',
- 'uploader': 'Generation What',
- 'uploader_id': 'UCHH9p1eetWCgt4kXBYCb3_w',
- 'upload_date': '20160411',
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': ['Youtube'],
- }, {
- 'url': 'http://generation-what.francetv.fr/europe/video/present-arms',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- youtube_id = self._search_regex(
- r"window\.videoURL\s*=\s*'([0-9A-Za-z_-]{11})';",
- webpage, 'youtube id')
-
- return self.url_result(youtube_id, ie='Youtube', video_id=youtube_id)
-
-
-class CultureboxIE(FranceTVBaseInfoExtractor):
- _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&]+)'
-
- _TESTS = [{
- 'url': 'https://culturebox.francetvinfo.fr/opera-classique/musique-classique/c-est-baroque/concerts/cantates-bwv-4-106-et-131-de-bach-par-raphael-pichon-57-268689',
- 'info_dict': {
- 'id': 'EV_134885',
- 'ext': 'mp4',
- 'title': 'Cantates BWV 4, 106 et 131 de Bach par Raphaël Pichon 5/7',
- 'description': 'md5:19c44af004b88219f4daa50fa9a351d4',
- 'upload_date': '20180206',
- 'timestamp': 1517945220,
- 'duration': 5981,
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': [FranceTVIE.ie_key()],
- }]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- if ">Ce live n'est plus disponible en replay<" in webpage:
- raise ExtractorError(
- 'Video %s is not available' % display_id, expected=True)
-
- video_id, catalogue = self._search_regex(
- r'["\'>]https?://videos\.francetv\.fr/video/([^@]+@.+?)["\'<]',
- webpage, 'video id').split('@')
-
- return self._make_url_result(video_id, catalogue)
-
-
-class FranceTVJeunesseIE(FranceTVBaseInfoExtractor):
- _VALID_URL = r'(?P<url>https?://(?:www\.)?(?:zouzous|ludo)\.fr/heros/(?P<id>[^/?#&]+))'
-
- _TESTS = [{
- 'url': 'https://www.zouzous.fr/heros/simon',
- 'info_dict': {
- 'id': 'simon',
- },
- 'playlist_count': 9,
- }, {
- 'url': 'https://www.ludo.fr/heros/ninjago',
- 'info_dict': {
- 'id': 'ninjago',
- },
- 'playlist_count': 10,
- }, {
- 'url': 'https://www.zouzous.fr/heros/simon?abc',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- playlist_id = mobj.group('id')
-
- playlist = self._download_json(
- '%s/%s' % (mobj.group('url'), 'playlist'), playlist_id)
-
- if not playlist.get('count'):
- raise ExtractorError(
- '%s is not available' % playlist_id, expected=True)
-
- entries = []
- for item in playlist['items']:
- identity = item.get('identity')
- if identity and isinstance(identity, compat_str):
- entries.append(self._make_url_result(identity))
-
- return self.playlist_result(entries, playlist_id)
diff --git a/youtube_dl/extractor/gameone.py b/youtube_dl/extractor/gameone.py
deleted file mode 100644
index a07d69841..000000000
--- a/youtube_dl/extractor/gameone.py
+++ /dev/null
@@ -1,134 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- xpath_with_ns,
- parse_iso8601,
- float_or_none,
- int_or_none,
-)
-
-NAMESPACE_MAP = {
- 'media': 'http://search.yahoo.com/mrss/',
-}
-
-# URL prefix to download the mp4 files directly instead of streaming via rtmp
-# Credits go to XBox-Maniac
-# http://board.jdownloader.org/showpost.php?p=185835&postcount=31
-RAW_MP4_URL = 'http://cdn.riptide-mtvn.com/'
-
-
-class GameOneIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?gameone\.de/tv/(?P<id>\d+)'
- _TESTS = [
- {
- 'url': 'http://www.gameone.de/tv/288',
- 'md5': '136656b7fb4c9cb4a8e2d500651c499b',
- 'info_dict': {
- 'id': '288',
- 'ext': 'mp4',
- 'title': 'Game One - Folge 288',
- 'duration': 1238,
- 'thumbnail': 'http://s3.gameone.de/gameone/assets/video_metas/teaser_images/000/643/636/big/640x360.jpg',
- 'description': 'FIFA-Pressepokal 2014, Star Citizen, Kingdom Come: Deliverance, Project Cars, Schöner Trants Nerdquiz Folge 2 Runde 1',
- 'age_limit': 16,
- 'upload_date': '20140513',
- 'timestamp': 1399980122,
- }
- },
- {
- 'url': 'http://gameone.de/tv/220',
- 'md5': '5227ca74c4ae6b5f74c0510a7c48839e',
- 'info_dict': {
- 'id': '220',
- 'ext': 'mp4',
- 'upload_date': '20120918',
- 'description': 'Jet Set Radio HD, Tekken Tag Tournament 2, Source Filmmaker',
- 'timestamp': 1347971451,
- 'title': 'Game One - Folge 220',
- 'duration': 896.62,
- 'age_limit': 16,
- }
- }
-
- ]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
- og_video = self._og_search_video_url(webpage, secure=False)
- description = self._html_search_meta('description', webpage)
- age_limit = int(
- self._search_regex(
- r'age=(\d+)',
- self._html_search_meta(
- 'age-de-meta-label',
- webpage),
- 'age_limit',
- '0'))
- mrss_url = self._search_regex(r'mrss=([^&]+)', og_video, 'mrss')
-
- mrss = self._download_xml(mrss_url, video_id, 'Downloading mrss')
- title = mrss.find('.//item/title').text
- thumbnail = mrss.find('.//item/image').get('url')
- timestamp = parse_iso8601(mrss.find('.//pubDate').text, delimiter=' ')
- content = mrss.find(xpath_with_ns('.//media:content', NAMESPACE_MAP))
- content_url = content.get('url')
-
- content = self._download_xml(
- content_url,
- video_id,
- 'Downloading media:content')
- rendition_items = content.findall('.//rendition')
- duration = float_or_none(rendition_items[0].get('duration'))
- formats = [
- {
- 'url': re.sub(r'.*/(r2)', RAW_MP4_URL + r'\1', r.find('./src').text),
- 'width': int_or_none(r.get('width')),
- 'height': int_or_none(r.get('height')),
- 'tbr': int_or_none(r.get('bitrate')),
- }
- for r in rendition_items
- ]
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'formats': formats,
- 'description': description,
- 'age_limit': age_limit,
- 'timestamp': timestamp,
- }
-
-
-class GameOnePlaylistIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?gameone\.de(?:/tv)?/?$'
- IE_NAME = 'gameone:playlist'
- _TEST = {
- 'url': 'http://www.gameone.de/tv',
- 'info_dict': {
- 'title': 'GameOne',
- },
- 'playlist_mincount': 294,
- }
-
- def _real_extract(self, url):
- webpage = self._download_webpage('http://www.gameone.de/tv', 'TV')
- max_id = max(map(int, re.findall(r'<a href="/tv/(\d+)"', webpage)))
- entries = [
- self.url_result('http://www.gameone.de/tv/%d' %
- video_id, 'GameOne')
- for video_id in range(max_id, 0, -1)]
-
- return {
- '_type': 'playlist',
- 'title': 'GameOne',
- 'entries': entries,
- }
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
deleted file mode 100644
index ec43c5ae4..000000000
--- a/youtube_dl/extractor/generic.py
+++ /dev/null
@@ -1,3426 +0,0 @@
-# coding: utf-8
-
-from __future__ import unicode_literals
-
-import os
-import re
-import sys
-
-from .common import InfoExtractor
-from .youtube import YoutubeIE
-from ..compat import (
- compat_etree_fromstring,
- compat_str,
- compat_urllib_parse_unquote,
- compat_urlparse,
- compat_xml_parse_error,
-)
-from ..utils import (
- determine_ext,
- ExtractorError,
- float_or_none,
- HEADRequest,
- is_html,
- js_to_json,
- KNOWN_EXTENSIONS,
- merge_dicts,
- mimetype2ext,
- orderedSet,
- sanitized_Request,
- smuggle_url,
- unescapeHTML,
- unified_strdate,
- unsmuggle_url,
- UnsupportedError,
- xpath_text,
-)
-from .commonprotocols import RtmpIE
-from .brightcove import (
- BrightcoveLegacyIE,
- BrightcoveNewIE,
-)
-from .nexx import (
- NexxIE,
- NexxEmbedIE,
-)
-from .nbc import NBCSportsVPlayerIE
-from .ooyala import OoyalaIE
-from .rutv import RUTVIE
-from .tvc import TVCIE
-from .sportbox import SportBoxIE
-from .smotri import SmotriIE
-from .myvi import MyviIE
-from .condenast import CondeNastIE
-from .udn import UDNEmbedIE
-from .senateisvp import SenateISVPIE
-from .svt import SVTIE
-from .pornhub import PornHubIE
-from .xhamster import XHamsterEmbedIE
-from .tnaflix import TNAFlixNetworkEmbedIE
-from .drtuber import DrTuberIE
-from .redtube import RedTubeIE
-from .tube8 import Tube8IE
-from .vimeo import VimeoIE
-from .dailymotion import DailymotionIE
-from .dailymail import DailyMailIE
-from .onionstudios import OnionStudiosIE
-from .viewlift import ViewLiftEmbedIE
-from .mtv import MTVServicesEmbeddedIE
-from .pladform import PladformIE
-from .videomore import VideomoreIE
-from .webcaster import WebcasterFeedIE
-from .googledrive import GoogleDriveIE
-from .jwplatform import JWPlatformIE
-from .digiteka import DigitekaIE
-from .arkena import ArkenaIE
-from .instagram import InstagramIE
-from .liveleak import LiveLeakIE
-from .threeqsdn import ThreeQSDNIE
-from .theplatform import ThePlatformIE
-from .kaltura import KalturaIE
-from .eagleplatform import EaglePlatformIE
-from .facebook import FacebookIE
-from .soundcloud import SoundcloudIE
-from .tunein import TuneInBaseIE
-from .vbox7 import Vbox7IE
-from .dbtv import DBTVIE
-from .piksel import PikselIE
-from .videa import VideaIE
-from .twentymin import TwentyMinutenIE
-from .ustream import UstreamIE
-from .openload import (
- OpenloadIE,
- VerystreamIE,
-)
-from .videopress import VideoPressIE
-from .rutube import RutubeIE
-from .limelight import LimelightBaseIE
-from .anvato import AnvatoIE
-from .washingtonpost import WashingtonPostIE
-from .wistia import WistiaIE
-from .mediaset import MediasetIE
-from .joj import JojIE
-from .megaphone import MegaphoneIE
-from .vzaar import VzaarIE
-from .channel9 import Channel9IE
-from .vshare import VShareIE
-from .mediasite import MediasiteIE
-from .springboardplatform import SpringboardPlatformIE
-from .yapfiles import YapFilesIE
-from .vice import ViceIE
-from .xfileshare import XFileShareIE
-from .cloudflarestream import CloudflareStreamIE
-from .peertube import PeerTubeIE
-from .teachable import TeachableIE
-from .indavideo import IndavideoEmbedIE
-from .apa import APAIE
-from .foxnews import FoxNewsIE
-from .viqeo import ViqeoIE
-from .expressen import ExpressenIE
-from .zype import ZypeIE
-
-
-class GenericIE(InfoExtractor):
- IE_DESC = 'Generic downloader that works on some sites'
- _VALID_URL = r'.*'
- IE_NAME = 'generic'
- _TESTS = [
- # Direct link to a video
- {
- 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
- 'md5': '67d406c2bcb6af27fa886f31aa934bbe',
- 'info_dict': {
- 'id': 'trailer',
- 'ext': 'mp4',
- 'title': 'trailer',
- 'upload_date': '20100513',
- }
- },
- # Direct link to media delivered compressed (until Accept-Encoding is *)
- {
- 'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
- 'md5': '128c42e68b13950268b648275386fc74',
- 'info_dict': {
- 'id': 'FictionJunction-Parallel_Hearts',
- 'ext': 'flac',
- 'title': 'FictionJunction-Parallel_Hearts',
- 'upload_date': '20140522',
- },
- 'expected_warnings': [
- 'URL could be a direct video link, returning it as such.'
- ],
- 'skip': 'URL invalid',
- },
- # Direct download with broken HEAD
- {
- 'url': 'http://ai-radio.org:8000/radio.opus',
- 'info_dict': {
- 'id': 'radio',
- 'ext': 'opus',
- 'title': 'radio',
- },
- 'params': {
- 'skip_download': True, # infinite live stream
- },
- 'expected_warnings': [
- r'501.*Not Implemented',
- r'400.*Bad Request',
- ],
- },
- # Direct link with incorrect MIME type
- {
- 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
- 'md5': '4ccbebe5f36706d85221f204d7eb5913',
- 'info_dict': {
- 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
- 'id': '5_Lennart_Poettering_-_Systemd',
- 'ext': 'webm',
- 'title': '5_Lennart_Poettering_-_Systemd',
- 'upload_date': '20141120',
- },
- 'expected_warnings': [
- 'URL could be a direct video link, returning it as such.'
- ]
- },
- # RSS feed
- {
- 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
- 'info_dict': {
- 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
- 'title': 'Zero Punctuation',
- 'description': 're:.*groundbreaking video review series.*'
- },
- 'playlist_mincount': 11,
- },
- # RSS feed with enclosure
- {
- 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
- 'info_dict': {
- 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
- 'ext': 'm4v',
- 'upload_date': '20150228',
- 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
- }
- },
- # RSS feed with enclosures and unsupported link URLs
- {
- 'url': 'http://www.hellointernet.fm/podcast?format=rss',
- 'info_dict': {
- 'id': 'http://www.hellointernet.fm/podcast?format=rss',
- 'description': 'CGP Grey and Brady Haran talk about YouTube, life, work, whatever.',
- 'title': 'Hello Internet',
- },
- 'playlist_mincount': 100,
- },
- # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
- {
- 'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
- 'info_dict': {
- 'id': 'smil',
- 'ext': 'mp4',
- 'title': 'Automatics, robotics and biocybernetics',
- 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
- 'upload_date': '20130627',
- 'formats': 'mincount:16',
- 'subtitles': 'mincount:1',
- },
- 'params': {
- 'force_generic_extractor': True,
- 'skip_download': True,
- },
- },
- # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
- {
- 'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
- 'info_dict': {
- 'id': 'hds',
- 'ext': 'flv',
- 'title': 'hds',
- 'formats': 'mincount:1',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- # SMIL from https://www.restudy.dk/video/play/id/1637
- {
- 'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
- 'info_dict': {
- 'id': 'video_1637',
- 'ext': 'flv',
- 'title': 'video_1637',
- 'formats': 'mincount:3',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
- {
- 'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
- 'info_dict': {
- 'id': 'smil-service',
- 'ext': 'flv',
- 'title': 'smil-service',
- 'formats': 'mincount:1',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
- {
- 'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
- 'info_dict': {
- 'id': '4719370',
- 'ext': 'mp4',
- 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
- 'formats': 'mincount:3',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
- {
- 'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
- 'info_dict': {
- 'id': 'mZlp2ctYIUEB',
- 'ext': 'mp4',
- 'title': 'Tikibad ontruimd wegens brand',
- 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 33,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- # MPD from http://dash-mse-test.appspot.com/media.html
- {
- 'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
- 'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
- 'info_dict': {
- 'id': 'car-20120827-manifest',
- 'ext': 'mp4',
- 'title': 'car-20120827-manifest',
- 'formats': 'mincount:9',
- 'upload_date': '20130904',
- },
- 'params': {
- 'format': 'bestvideo',
- },
- },
- # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
- {
- 'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
- 'info_dict': {
- 'id': 'content',
- 'ext': 'mp4',
- 'title': 'content',
- 'formats': 'mincount:8',
- },
- 'params': {
- # m3u8 downloads
- 'skip_download': True,
- },
- 'skip': 'video gone',
- },
- # m3u8 served with Content-Type: text/plain
- {
- 'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
- 'info_dict': {
- 'id': 'index',
- 'ext': 'mp4',
- 'title': 'index',
- 'upload_date': '20140720',
- 'formats': 'mincount:11',
- },
- 'params': {
- # m3u8 downloads
- 'skip_download': True,
- },
- 'skip': 'video gone',
- },
- # google redirect
- {
- 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
- 'info_dict': {
- 'id': 'cmQHVoWB5FY',
- 'ext': 'mp4',
- 'upload_date': '20130224',
- 'uploader_id': 'TheVerge',
- 'description': r're:^Chris Ziegler takes a look at the\.*',
- 'uploader': 'The Verge',
- 'title': 'First Firefox OS phones side-by-side',
- },
- 'params': {
- 'skip_download': False,
- }
- },
- {
- # redirect in Refresh HTTP header
- 'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
- 'info_dict': {
- 'id': 'pO8h3EaFRdo',
- 'ext': 'mp4',
- 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
- 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
- 'upload_date': '20150917',
- 'uploader_id': 'brtvofficial',
- 'uploader': 'Boiler Room',
- },
- 'params': {
- 'skip_download': False,
- },
- },
- {
- 'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
- 'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
- 'info_dict': {
- 'id': '13601338388002',
- 'ext': 'mp4',
- 'uploader': 'www.hodiho.fr',
- 'title': 'R\u00e9gis plante sa Jeep',
- }
- },
- # bandcamp page with custom domain
- {
- 'add_ie': ['Bandcamp'],
- 'url': 'http://bronyrock.com/track/the-pony-mash',
- 'info_dict': {
- 'id': '3235767654',
- 'ext': 'mp3',
- 'title': 'The Pony Mash',
- 'uploader': 'M_Pallante',
- },
- 'skip': 'There is a limit of 200 free downloads / month for the test song',
- },
- {
- # embedded brightcove video
- # it also tests brightcove videos that need to set the 'Referer'
- # in the http requests
- 'add_ie': ['BrightcoveLegacy'],
- 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
- 'info_dict': {
- 'id': '2765128793001',
- 'ext': 'mp4',
- 'title': 'Le cours de bourse : l’analyse technique',
- 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
- 'uploader': 'BFM BUSINESS',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # embedded with itemprop embedURL and video id spelled as `idVideo`
- 'add_id': ['BrightcoveLegacy'],
- 'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/',
- 'info_dict': {
- 'id': '5255628253001',
- 'ext': 'mp4',
- 'title': 'md5:37c519b1128915607601e75a87995fc0',
- 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26',
- 'uploader': 'BFM BUSINESS',
- 'uploader_id': '876450612001',
- 'timestamp': 1482255315,
- 'upload_date': '20161220',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # https://github.com/ytdl-org/youtube-dl/issues/2253
- 'url': 'http://bcove.me/i6nfkrc3',
- 'md5': '0ba9446db037002366bab3b3eb30c88c',
- 'info_dict': {
- 'id': '3101154703001',
- 'ext': 'mp4',
- 'title': 'Still no power',
- 'uploader': 'thestar.com',
- 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
- },
- 'add_ie': ['BrightcoveLegacy'],
- 'skip': 'video gone',
- },
- {
- 'url': 'http://www.championat.com/video/football/v/87/87499.html',
- 'md5': 'fb973ecf6e4a78a67453647444222983',
- 'info_dict': {
- 'id': '3414141473001',
- 'ext': 'mp4',
- 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
- 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
- 'uploader': 'Championat',
- },
- },
- {
- # https://github.com/ytdl-org/youtube-dl/issues/3541
- 'add_ie': ['BrightcoveLegacy'],
- 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
- 'info_dict': {
- 'id': '3866516442001',
- 'ext': 'mp4',
- 'title': 'Leer mij vrouwen kennen: Aflevering 1',
- 'description': 'Leer mij vrouwen kennen: Aflevering 1',
- 'uploader': 'SBS Broadcasting',
- },
- 'skip': 'Restricted to Netherlands',
- 'params': {
- 'skip_download': True, # m3u8 download
- },
- },
- {
- # Brightcove video in <iframe>
- 'url': 'http://www.un.org/chinese/News/story.asp?NewsID=27724',
- 'md5': '36d74ef5e37c8b4a2ce92880d208b968',
- 'info_dict': {
- 'id': '5360463607001',
- 'ext': 'mp4',
- 'title': '叙利亚失明儿童在废墟上演唱《心跳》 呼吁获得正常童年生活',
- 'description': '联合国儿童基金会中东和北非区域大使、作曲家扎德·迪拉尼(Zade Dirani)在3月15日叙利亚冲突爆发7周年纪念日之际发布了为叙利亚谱写的歌曲《心跳》(HEARTBEAT),为受到六年冲突影响的叙利亚儿童发出强烈呐喊,呼吁世界做出共同努力,使叙利亚儿童重新获得享有正常童年生活的权利。',
- 'uploader': 'United Nations',
- 'uploader_id': '1362235914001',
- 'timestamp': 1489593889,
- 'upload_date': '20170315',
- },
- 'add_ie': ['BrightcoveLegacy'],
- },
- {
- # Brightcove with alternative playerID key
- 'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html',
- 'info_dict': {
- 'id': 'nmeth.2062_SV1',
- 'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research',
- },
- 'playlist': [{
- 'info_dict': {
- 'id': '2228375078001',
- 'ext': 'mp4',
- 'title': 'nmeth.2062-sv1',
- 'description': 'nmeth.2062-sv1',
- 'timestamp': 1363357591,
- 'upload_date': '20130315',
- 'uploader': 'Nature Publishing Group',
- 'uploader_id': '1964492299001',
- },
- }],
- },
- {
- # Brightcove with UUID in videoPlayer
- 'url': 'http://www8.hp.com/cn/zh/home.html',
- 'info_dict': {
- 'id': '5255815316001',
- 'ext': 'mp4',
- 'title': 'Sprocket Video - China',
- 'description': 'Sprocket Video - China',
- 'uploader': 'HP-Video Gallery',
- 'timestamp': 1482263210,
- 'upload_date': '20161220',
- 'uploader_id': '1107601872001',
- },
- 'params': {
- 'skip_download': True, # m3u8 download
- },
- 'skip': 'video rotates...weekly?',
- },
- {
- # Brightcove:new type [2].
- 'url': 'http://www.delawaresportszone.com/video-st-thomas-more-earns-first-trip-to-basketball-semis',
- 'md5': '2b35148fcf48da41c9fb4591650784f3',
- 'info_dict': {
- 'id': '5348741021001',
- 'ext': 'mp4',
- 'upload_date': '20170306',
- 'uploader_id': '4191638492001',
- 'timestamp': 1488769918,
- 'title': 'VIDEO: St. Thomas More earns first trip to basketball semis',
-
- },
- },
- {
- # Alternative brightcove <video> attributes
- 'url': 'http://www.programme-tv.net/videos/extraits/81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche/',
- 'info_dict': {
- 'id': '81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche',
- 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche, Extraits : toutes les vidéos avec Télé-Loisirs",
- },
- 'playlist': [{
- 'md5': '732d22ba3d33f2f3fc253c39f8f36523',
- 'info_dict': {
- 'id': '5311302538001',
- 'ext': 'mp4',
- 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche",
- 'description': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche (France 2, 5 février 2017)",
- 'timestamp': 1486321708,
- 'upload_date': '20170205',
- 'uploader_id': '800000640001',
- },
- 'only_matching': True,
- }],
- },
- {
- # Brightcove with UUID in videoPlayer
- 'url': 'http://www8.hp.com/cn/zh/home.html',
- 'info_dict': {
- 'id': '5255815316001',
- 'ext': 'mp4',
- 'title': 'Sprocket Video - China',
- 'description': 'Sprocket Video - China',
- 'uploader': 'HP-Video Gallery',
- 'timestamp': 1482263210,
- 'upload_date': '20161220',
- 'uploader_id': '1107601872001',
- },
- 'params': {
- 'skip_download': True, # m3u8 download
- },
- },
- # ooyala video
- {
- 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
- 'md5': '166dd577b433b4d4ebfee10b0824d8ff',
- 'info_dict': {
- 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
- 'ext': 'mp4',
- 'title': '2cc213299525360.mov', # that's what we get
- 'duration': 238.231,
- },
- 'add_ie': ['Ooyala'],
- },
- {
- # ooyala video embedded with http://player.ooyala.com/iframe.js
- 'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
- 'info_dict': {
- 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
- 'ext': 'mp4',
- 'title': '"Steve Jobs: Man in the Machine" trailer',
- 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
- 'duration': 135.427,
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'movie expired',
- },
- # ooyala video embedded with http://player.ooyala.com/static/v4/production/latest/core.min.js
- {
- 'url': 'http://wnep.com/2017/07/22/steampunk-fest-comes-to-honesdale/',
- 'info_dict': {
- 'id': 'lwYWYxYzE6V5uJMjNGyKtwwiw9ZJD7t2',
- 'ext': 'mp4',
- 'title': 'Steampunk Fest Comes to Honesdale',
- 'duration': 43.276,
- },
- 'params': {
- 'skip_download': True,
- }
- },
- # embed.ly video
- {
- 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
- 'info_dict': {
- 'id': '9ODmcdjQcHQ',
- 'ext': 'mp4',
- 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
- 'upload_date': '20140225',
- 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
- 'uploader': 'Tested',
- 'uploader_id': 'testedcom',
- },
- # No need to test YoutubeIE here
- 'params': {
- 'skip_download': True,
- },
- },
- # funnyordie embed
- {
- 'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
- 'info_dict': {
- 'id': '18e820ec3f',
- 'ext': 'mp4',
- 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
- 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
- },
- # HEAD requests lead to endless 301, while GET is OK
- 'expected_warnings': ['301'],
- },
- # RUTV embed
- {
- 'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
- 'info_dict': {
- 'id': '776940',
- 'ext': 'mp4',
- 'title': 'Охотское море стало целиком российским',
- 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- },
- # TVC embed
- {
- 'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
- 'info_dict': {
- 'id': '55304',
- 'ext': 'mp4',
- 'title': 'Дошкольное воспитание',
- },
- },
- # SportBox embed
- {
- 'url': 'http://www.vestifinance.ru/articles/25753',
- 'info_dict': {
- 'id': '25753',
- 'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
- },
- 'playlist': [{
- 'info_dict': {
- 'id': '370908',
- 'title': 'Госзаказ. День 3',
- 'ext': 'mp4',
- }
- }, {
- 'info_dict': {
- 'id': '370905',
- 'title': 'Госзаказ. День 2',
- 'ext': 'mp4',
- }
- }, {
- 'info_dict': {
- 'id': '370902',
- 'title': 'Госзаказ. День 1',
- 'ext': 'mp4',
- }
- }],
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- },
- # Myvi.ru embed
- {
- 'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
- 'info_dict': {
- 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
- 'ext': 'mp4',
- 'title': 'Ужастики, русский трейлер (2015)',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 153,
- }
- },
- # XHamster embed
- {
- 'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
- 'info_dict': {
- 'id': 'showthread',
- 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
- },
- 'playlist_mincount': 7,
- # This forum does not allow <iframe> syntaxes anymore
- # Now HTML tags are displayed as-is
- 'skip': 'No videos on this page',
- },
- # Embedded TED video
- {
- 'url': 'http://en.support.wordpress.com/videos/ted-talks/',
- 'md5': '65fdff94098e4a607385a60c5177c638',
- 'info_dict': {
- 'id': '1969',
- 'ext': 'mp4',
- 'title': 'Hidden miracles of the natural world',
- 'uploader': 'Louie Schwartzberg',
- 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
- }
- },
- # nowvideo embed hidden behind percent encoding
- {
- 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
- 'md5': '2baf4ddd70f697d94b1c18cf796d5107',
- 'info_dict': {
- 'id': '06e53103ca9aa',
- 'ext': 'flv',
- 'title': 'Macross Episode 001 Watch Macross Episode 001 onl',
- 'description': 'No description',
- },
- },
- # arte embed
- {
- 'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
- 'md5': '7653032cbb25bf6c80d80f217055fa43',
- 'info_dict': {
- 'id': '048195-004_PLUS7-F',
- 'ext': 'flv',
- 'title': 'X:enius',
- 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
- 'upload_date': '20140320',
- },
- 'params': {
- 'skip_download': 'Requires rtmpdump'
- },
- 'skip': 'video gone',
- },
- # francetv embed
- {
- 'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
- 'info_dict': {
- 'id': 'EV_30231',
- 'ext': 'mp4',
- 'title': 'Alcaline, le concert avec Calogero',
- 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
- 'upload_date': '20150226',
- 'timestamp': 1424989860,
- 'duration': 5400,
- },
- 'params': {
- # m3u8 downloads
- 'skip_download': True,
- },
- 'expected_warnings': [
- 'Forbidden'
- ]
- },
- # Condé Nast embed
- {
- 'url': 'http://www.wired.com/2014/04/honda-asimo/',
- 'md5': 'ba0dfe966fa007657bd1443ee672db0f',
- 'info_dict': {
- 'id': '53501be369702d3275860000',
- 'ext': 'mp4',
- 'title': 'Honda’s New Asimo Robot Is More Human Than Ever',
- }
- },
- # Dailymotion embed
- {
- 'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
- 'md5': '441aeeb82eb72c422c7f14ec533999cd',
- 'info_dict': {
- 'id': 'k2mm4bCdJ6CQ2i7c8o2',
- 'ext': 'mp4',
- 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
- 'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
- 'uploader': 'Spi0n',
- 'uploader_id': 'xgditw',
- 'upload_date': '20140425',
- 'timestamp': 1398441542,
- },
- 'add_ie': ['Dailymotion'],
- },
- # DailyMail embed
- {
- 'url': 'http://www.bumm.sk/krimi/2017/07/05/biztonsagi-kamera-buktatta-le-az-agg-ferfit-utlegelo-apolot',
- 'info_dict': {
- 'id': '1495629',
- 'ext': 'mp4',
- 'title': 'Care worker punches elderly dementia patient in head 11 times',
- 'description': 'md5:3a743dee84e57e48ec68bf67113199a5',
- },
- 'add_ie': ['DailyMail'],
- 'params': {
- 'skip_download': True,
- },
- },
- # YouTube embed
- {
- 'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
- 'info_dict': {
- 'id': 'FXRb4ykk4S0',
- 'ext': 'mp4',
- 'title': 'The NBL Auction 2014',
- 'uploader': 'BADMINTON England',
- 'uploader_id': 'BADMINTONEvents',
- 'upload_date': '20140603',
- 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
- },
- 'add_ie': ['Youtube'],
- 'params': {
- 'skip_download': True,
- }
- },
- # MTVSercices embed
- {
- 'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
- 'md5': 'ca1aef97695ef2c1d6973256a57e5252',
- 'info_dict': {
- 'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1',
- 'ext': 'mp4',
- 'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
- 'description': 'Two valets share their love for movie star Liam Neesons.',
- 'timestamp': 1349922600,
- 'upload_date': '20121011',
- },
- },
- # YouTube embed via <data-embed-url="">
- {
- 'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
- 'info_dict': {
- 'id': '4vAffPZIT44',
- 'ext': 'mp4',
- 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
- 'uploader': 'Gameloft',
- 'uploader_id': 'gameloft',
- 'upload_date': '20140828',
- 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
- },
- 'params': {
- 'skip_download': True,
- }
- },
- # YouTube <object> embed
- {
- 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
- 'md5': '516718101ec834f74318df76259fb3cc',
- 'info_dict': {
- 'id': 'msN87y-iEx0',
- 'ext': 'webm',
- 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
- 'upload_date': '20080526',
- 'description': 'md5:0ffc78ea3f01b2e2c247d5f8d1d3c18d',
- 'uploader': 'Christopher Sykes',
- 'uploader_id': 'ChristopherJSykes',
- },
- 'add_ie': ['Youtube'],
- },
- # Camtasia studio
- {
- 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
- 'playlist': [{
- 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
- 'info_dict': {
- 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
- 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
- 'ext': 'flv',
- 'duration': 2235.90,
- }
- }, {
- 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
- 'info_dict': {
- 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
- 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
- 'ext': 'flv',
- 'duration': 2235.93,
- }
- }],
- 'info_dict': {
- 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
- }
- },
- # Flowplayer
- {
- 'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
- 'md5': '9d65602bf31c6e20014319c7d07fba27',
- 'info_dict': {
- 'id': '5123ea6d5e5a7',
- 'ext': 'mp4',
- 'age_limit': 18,
- 'uploader': 'www.handjobhub.com',
- 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
- }
- },
- # Multiple brightcove videos
- # https://github.com/ytdl-org/youtube-dl/issues/2283
- {
- 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
- 'info_dict': {
- 'id': 'always-never',
- 'title': 'Always / Never - The New Yorker',
- },
- 'playlist_count': 3,
- 'params': {
- 'extract_flat': False,
- 'skip_download': True,
- }
- },
- # MLB embed
- {
- 'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
- 'md5': '96f09a37e44da40dd083e12d9a683327',
- 'info_dict': {
- 'id': '33322633',
- 'ext': 'mp4',
- 'title': 'Ump changes call to ball',
- 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
- 'duration': 48,
- 'timestamp': 1401537900,
- 'upload_date': '20140531',
- 'thumbnail': r're:^https?://.*\.jpg$',
- },
- },
- # Wistia embed
- {
- 'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
- 'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
- 'info_dict': {
- 'id': '6e2wtrbdaf',
- 'ext': 'mov',
- 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
- 'description': 'a Paywall Videos video from Remilon',
- 'duration': 644.072,
- 'uploader': 'study.com',
- 'timestamp': 1459678540,
- 'upload_date': '20160403',
- 'filesize': 24687186,
- },
- },
- {
- 'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
- 'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
- 'info_dict': {
- 'id': 'uxjb0lwrcz',
- 'ext': 'mp4',
- 'title': 'Conversation about Hexagonal Rails Part 1',
- 'description': 'a Martin Fowler video from ThoughtWorks',
- 'duration': 1715.0,
- 'uploader': 'thoughtworks.wistia.com',
- 'timestamp': 1401832161,
- 'upload_date': '20140603',
- },
- },
- # Wistia standard embed (async)
- {
- 'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
- 'info_dict': {
- 'id': '807fafadvk',
- 'ext': 'mp4',
- 'title': 'Drip Brennan Dunn Workshop',
- 'description': 'a JV Webinars video from getdrip-1',
- 'duration': 4986.95,
- 'timestamp': 1463607249,
- 'upload_date': '20160518',
- },
- 'params': {
- 'skip_download': True,
- }
- },
- # Soundcloud embed
- {
- 'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
- 'info_dict': {
- 'id': '174391317',
- 'ext': 'mp3',
- 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
- 'uploader': 'Sophos Security',
- 'title': 'Chet Chat 171 - Oct 29, 2014',
- 'upload_date': '20141029',
- }
- },
- # Soundcloud multiple embeds
- {
- 'url': 'http://www.guitarplayer.com/lessons/1014/legato-workout-one-hour-to-more-fluid-performance---tab/52809',
- 'info_dict': {
- 'id': '52809',
- 'title': 'Guitar Essentials: Legato Workout—One-Hour to Fluid Performance | TAB + AUDIO',
- },
- 'playlist_mincount': 7,
- },
- # TuneIn station embed
- {
- 'url': 'http://radiocnrv.com/promouvoir-radio-cnrv/',
- 'info_dict': {
- 'id': '204146',
- 'ext': 'mp3',
- 'title': 'CNRV',
- 'location': 'Paris, France',
- 'is_live': True,
- },
- 'params': {
- # Live stream
- 'skip_download': True,
- },
- },
- # Livestream embed
- {
- 'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
- 'info_dict': {
- 'id': '67864563',
- 'ext': 'flv',
- 'upload_date': '20141112',
- 'title': 'Rosetta #CometLanding webcast HL 10',
- }
- },
- # Another Livestream embed, without 'new.' in URL
- {
- 'url': 'https://www.freespeech.org/',
- 'info_dict': {
- 'id': '123537347',
- 'ext': 'mp4',
- 'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
- },
- 'params': {
- # Live stream
- 'skip_download': True,
- },
- },
- # LazyYT
- {
- 'url': 'https://skiplagged.com/',
- 'info_dict': {
- 'id': 'skiplagged',
- 'title': 'Skiplagged: The smart way to find cheap flights',
- },
- 'playlist_mincount': 1,
- 'add_ie': ['Youtube'],
- },
- # Cinchcast embed
- {
- 'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
- 'info_dict': {
- 'id': '7141703',
- 'ext': 'mp3',
- 'upload_date': '20141126',
- 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
- }
- },
- # Cinerama player
- {
- 'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
- 'info_dict': {
- 'id': '730m_DandD_1901_512k',
- 'ext': 'mp4',
- 'uploader': 'www.abc.net.au',
- 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
- }
- },
- # embedded viddler video
- {
- 'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
- 'info_dict': {
- 'id': '4d03aad9',
- 'ext': 'mp4',
- 'uploader': 'deadspin',
- 'title': 'WALL-TO-GORTAT',
- 'timestamp': 1422285291,
- 'upload_date': '20150126',
- },
- 'add_ie': ['Viddler'],
- },
- # Libsyn embed
- {
- 'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
- 'info_dict': {
- 'id': '3377616',
- 'ext': 'mp3',
- 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
- 'description': 'md5:601cb790edd05908957dae8aaa866465',
- 'upload_date': '20150220',
- },
- 'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
- },
- # jwplayer YouTube
- {
- 'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
- 'info_dict': {
- 'id': 'Mrj4DVp2zeA',
- 'ext': 'mp4',
- 'upload_date': '20150212',
- 'uploader': 'The National Archives UK',
- 'description': 'md5:8078af856dca76edc42910b61273dbbf',
- 'uploader_id': 'NationalArchives08',
- 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
- },
- },
- # jwplayer rtmp
- {
- 'url': 'http://www.suffolk.edu/sjc/live.php',
- 'info_dict': {
- 'id': 'live',
- 'ext': 'flv',
- 'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
- 'uploader': 'www.suffolk.edu',
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'Only has video a few mornings per month, see http://www.suffolk.edu/sjc/',
- },
- # Complex jwplayer
- {
- 'url': 'http://www.indiedb.com/games/king-machine/videos',
- 'info_dict': {
- 'id': 'videos',
- 'ext': 'mp4',
- 'title': 'king machine trailer 1',
- 'description': 'Browse King Machine videos & audio for sweet media. Your eyes will thank you.',
- 'thumbnail': r're:^https?://.*\.jpg$',
- },
- },
- {
- # JWPlayer config passed as variable
- 'url': 'http://www.txxx.com/videos/3326530/ariele/',
- 'info_dict': {
- 'id': '3326530_hq',
- 'ext': 'mp4',
- 'title': 'ARIELE | Tube Cup',
- 'uploader': 'www.txxx.com',
- 'age_limit': 18,
- },
- 'params': {
- 'skip_download': True,
- }
- },
- {
- # JWPlatform iframe
- 'url': 'https://www.mediaite.com/tv/dem-senator-claims-gary-cohn-faked-a-bad-connection-during-trump-call-to-get-him-off-the-phone/',
- 'md5': 'ca00a040364b5b439230e7ebfd02c4e9',
- 'info_dict': {
- 'id': 'O0c5JcKT',
- 'ext': 'mp4',
- 'upload_date': '20171122',
- 'timestamp': 1511366290,
- 'title': 'Dem Senator Claims Gary Cohn Faked a Bad Connection During Trump Call to Get Him Off the Phone',
- },
- 'add_ie': [JWPlatformIE.ie_key()],
- },
- {
- # Video.js embed, multiple formats
- 'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
- 'info_dict': {
- 'id': 'yygqldloqIk',
- 'ext': 'mp4',
- 'title': 'SolidWorks. Урок 6 Настройка чертежа',
- 'description': 'md5:baf95267792646afdbf030e4d06b2ab3',
- 'upload_date': '20130314',
- 'uploader': 'PROстое3D',
- 'uploader_id': 'PROstoe3D',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # Video.js embed, single format
- 'url': 'https://www.vooplayer.com/v3/watch/watch.php?v=NzgwNTg=',
- 'info_dict': {
- 'id': 'watch',
- 'ext': 'mp4',
- 'title': 'Step 1 - Good Foundation',
- 'description': 'md5:d1e7ff33a29fc3eb1673d6c270d344f4',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- # rtl.nl embed
- {
- 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
- 'playlist_mincount': 5,
- 'info_dict': {
- 'id': 'aanslagen-kopenhagen',
- 'title': 'Aanslagen Kopenhagen',
- }
- },
- # Zapiks embed
- {
- 'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
- 'info_dict': {
- 'id': '118046',
- 'ext': 'mp4',
- 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
- }
- },
- # Kaltura embed (different embed code)
- {
- 'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
- 'info_dict': {
- 'id': '1_a52wc67y',
- 'ext': 'flv',
- 'upload_date': '20150127',
- 'uploader_id': 'PremierMedia',
- 'timestamp': int,
- 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
- },
- },
- # Kaltura embed with single quotes
- {
- 'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
- 'info_dict': {
- 'id': '0_izeg5utt',
- 'ext': 'mp4',
- 'title': '35871',
- 'timestamp': 1355743100,
- 'upload_date': '20121217',
- 'uploader_id': 'cplapp@learn360.com',
- },
- 'add_ie': ['Kaltura'],
- },
- {
- # Kaltura embedded via quoted entry_id
- 'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
- 'info_dict': {
- 'id': '0_utuok90b',
- 'ext': 'mp4',
- 'title': '06_matthew_brender_raj_dutt',
- 'timestamp': 1466638791,
- 'upload_date': '20160622',
- },
- 'add_ie': ['Kaltura'],
- 'expected_warnings': [
- 'Could not send HEAD request'
- ],
- 'params': {
- 'skip_download': True,
- }
- },
- {
- # Kaltura embedded, some fileExt broken (#11480)
- 'url': 'http://www.cornell.edu/video/nima-arkani-hamed-standard-models-of-particle-physics',
- 'info_dict': {
- 'id': '1_sgtvehim',
- 'ext': 'mp4',
- 'title': 'Our "Standard Models" of particle physics and cosmology',
- 'description': 'md5:67ea74807b8c4fea92a6f38d6d323861',
- 'timestamp': 1321158993,
- 'upload_date': '20111113',
- 'uploader_id': 'kps1',
- },
- 'add_ie': ['Kaltura'],
- },
- {
- # Kaltura iframe embed
- 'url': 'http://www.gsd.harvard.edu/event/i-m-pei-a-centennial-celebration/',
- 'md5': 'ae5ace8eb09dc1a35d03b579a9c2cc44',
- 'info_dict': {
- 'id': '0_f2cfbpwy',
- 'ext': 'mp4',
- 'title': 'I. M. Pei: A Centennial Celebration',
- 'description': 'md5:1db8f40c69edc46ca180ba30c567f37c',
- 'upload_date': '20170403',
- 'uploader_id': 'batchUser',
- 'timestamp': 1491232186,
- },
- 'add_ie': ['Kaltura'],
- },
- {
- # Kaltura iframe embed, more sophisticated
- 'url': 'http://www.cns.nyu.edu/~eero/math-tools/Videos/lecture-05sep2017.html',
- 'info_dict': {
- 'id': '1_9gzouybz',
- 'ext': 'mp4',
- 'title': 'lecture-05sep2017',
- 'description': 'md5:40f347d91fd4ba047e511c5321064b49',
- 'upload_date': '20170913',
- 'uploader_id': 'eps2',
- 'timestamp': 1505340777,
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': ['Kaltura'],
- },
- {
- # meta twitter:player
- 'url': 'http://thechive.com/2017/12/08/all-i-want-for-christmas-is-more-twerk/',
- 'info_dict': {
- 'id': '0_01b42zps',
- 'ext': 'mp4',
- 'title': 'Main Twerk (Video)',
- 'upload_date': '20171208',
- 'uploader_id': 'sebastian.salinas@thechive.com',
- 'timestamp': 1512713057,
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': ['Kaltura'],
- },
- # referrer protected EaglePlatform embed
- {
- 'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/',
- 'info_dict': {
- 'id': '582306',
- 'ext': 'mp4',
- 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 3382,
- 'view_count': int,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- # ClipYou (EaglePlatform) embed (custom URL)
- {
- 'url': 'http://muz-tv.ru/play/7129/',
- # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
- 'info_dict': {
- 'id': '12820',
- 'ext': 'mp4',
- 'title': "'O Sole Mio",
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 216,
- 'view_count': int,
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'This video is unavailable.',
- },
- # Pladform embed
- {
- 'url': 'http://muz-tv.ru/kinozal/view/7400/',
- 'info_dict': {
- 'id': '100183293',
- 'ext': 'mp4',
- 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
- 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 694,
- 'age_limit': 0,
- },
- 'skip': 'HTTP Error 404: Not Found',
- },
- # Playwire embed
- {
- 'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
- 'info_dict': {
- 'id': '3519514',
- 'ext': 'mp4',
- 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
- 'thumbnail': r're:^https?://.*\.png$',
- 'duration': 45.115,
- },
- },
- # 5min embed
- {
- 'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
- 'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
- 'info_dict': {
- 'id': '518726732',
- 'ext': 'mp4',
- 'title': 'Facebook Creates "On This Day" | Crunch Report',
- 'description': 'Amazon updates Fire TV line, Tesla\'s Model X spotted in the wild',
- 'timestamp': 1427237531,
- 'uploader': 'Crunch Report',
- 'upload_date': '20150324',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- },
- # Crooks and Liars embed
- {
- 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
- 'info_dict': {
- 'id': '8RUoRhRi',
- 'ext': 'mp4',
- 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
- 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
- 'timestamp': 1428207000,
- 'upload_date': '20150405',
- 'uploader': 'Heather',
- },
- },
- # Crooks and Liars external embed
- {
- 'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
- 'info_dict': {
- 'id': 'MTE3MjUtMzQ2MzA',
- 'ext': 'mp4',
- 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
- 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
- 'timestamp': 1265032391,
- 'upload_date': '20100201',
- 'uploader': 'Heather',
- },
- },
- # NBC Sports vplayer embed
- {
- 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
- 'info_dict': {
- 'id': 'ln7x1qSThw4k',
- 'ext': 'flv',
- 'title': "PFT Live: New leader in the 'new-look' defense",
- 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
- 'uploader': 'NBCU-SPORTS',
- 'upload_date': '20140107',
- 'timestamp': 1389118457,
- },
- 'skip': 'Invalid Page URL',
- },
- # NBC News embed
- {
- 'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
- 'md5': '1aa589c675898ae6d37a17913cf68d66',
- 'info_dict': {
- 'id': 'x_dtl_oa_LettermanliftPR_160608',
- 'ext': 'mp4',
- 'title': 'David Letterman: A Preview',
- 'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
- 'upload_date': '20160609',
- 'timestamp': 1465431544,
- 'uploader': 'NBCU-NEWS',
- },
- },
- # UDN embed
- {
- 'url': 'https://video.udn.com/news/300346',
- 'md5': 'fd2060e988c326991037b9aff9df21a6',
- 'info_dict': {
- 'id': '300346',
- 'ext': 'mp4',
- 'title': '中一中男師變性 全校師生力挺',
- 'thumbnail': r're:^https?://.*\.jpg$',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- 'expected_warnings': ['Failed to parse JSON Expecting value'],
- },
- # Brightcove URL in single quotes
- {
- 'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
- 'md5': '4ae374f1f8b91c889c4b9203c8c752af',
- 'info_dict': {
- 'id': '4255764656001',
- 'ext': 'mp4',
- 'title': 'SN Presents: Russell Martin, World Citizen',
- 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
- 'uploader': 'Rogers Sportsnet',
- 'uploader_id': '1704050871',
- 'upload_date': '20150525',
- 'timestamp': 1432570283,
- },
- },
- # OnionStudios embed
- {
- 'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
- 'info_dict': {
- 'id': '2855',
- 'ext': 'mp4',
- 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
- 'thumbnail': r're:^https?://.*\.jpe?g$',
- 'uploader': 'ClickHole',
- 'uploader_id': 'clickhole',
- }
- },
- # SnagFilms embed
- {
- 'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
- 'info_dict': {
- 'id': '74849a00-85a9-11e1-9660-123139220831',
- 'ext': 'mp4',
- 'title': '#whilewewatch',
- }
- },
- # AdobeTVVideo embed
- {
- 'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
- 'md5': '43662b577c018ad707a63766462b1e87',
- 'info_dict': {
- 'id': '2456',
- 'ext': 'mp4',
- 'title': 'New experience with Acrobat DC',
- 'description': 'New experience with Acrobat DC',
- 'duration': 248.667,
- },
- },
- # BrightcoveInPageEmbed embed
- {
- 'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
- 'info_dict': {
- 'id': '4238694884001',
- 'ext': 'flv',
- 'title': 'Tabletop: Dread, Last Thoughts',
- 'description': 'Tabletop: Dread, Last Thoughts',
- 'duration': 51690,
- },
- },
- # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
- # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
- {
- 'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
- 'info_dict': {
- 'id': '4785848093001',
- 'ext': 'mp4',
- 'title': 'The Cardinal Pell Interview',
- 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
- 'uploader': 'GlobeCast Australia - GlobeStream',
- 'uploader_id': '2733773828001',
- 'upload_date': '20160304',
- 'timestamp': 1457083087,
- },
- 'params': {
- # m3u8 downloads
- 'skip_download': True,
- },
- },
- {
- # Brightcove embed with whitespace around attribute names
- 'url': 'http://www.stack.com/video/3167554373001/learn-to-hit-open-three-pointers-with-damian-lillard-s-baseline-drift-drill',
- 'info_dict': {
- 'id': '3167554373001',
- 'ext': 'mp4',
- 'title': "Learn to Hit Open Three-Pointers With Damian Lillard's Baseline Drift Drill",
- 'description': 'md5:57bacb0e0f29349de4972bfda3191713',
- 'uploader_id': '1079349493',
- 'upload_date': '20140207',
- 'timestamp': 1391810548,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- # Another form of arte.tv embed
- {
- 'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
- 'md5': '850bfe45417ddf221288c88a0cffe2e2',
- 'info_dict': {
- 'id': '030273-562_PLUS7-F',
- 'ext': 'mp4',
- 'title': 'ARTE Reportage - Nulle part, en France',
- 'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
- 'upload_date': '20160409',
- },
- },
- # LiveLeak embed
- {
- 'url': 'http://www.wykop.pl/link/3088787/',
- 'md5': '7619da8c820e835bef21a1efa2a0fc71',
- 'info_dict': {
- 'id': '874_1459135191',
- 'ext': 'mp4',
- 'title': 'Man shows poor quality of new apartment building',
- 'description': 'The wall is like a sand pile.',
- 'uploader': 'Lake8737',
- },
- 'add_ie': [LiveLeakIE.ie_key()],
- },
- # Another LiveLeak embed pattern (#13336)
- {
- 'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/',
- 'info_dict': {
- 'id': '2eb_1496309988',
- 'ext': 'mp4',
- 'title': 'Thief robs place where everyone was armed',
- 'description': 'md5:694d73ee79e535953cf2488562288eee',
- 'uploader': 'brazilwtf',
- },
- 'add_ie': [LiveLeakIE.ie_key()],
- },
- # Duplicated embedded video URLs
- {
- 'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
- 'info_dict': {
- 'id': '149298443_480_16c25b74_2',
- 'ext': 'mp4',
- 'title': 'vs. Blue Orange Spring Game',
- 'uploader': 'www.hudl.com',
- },
- },
- # twitter:player:stream embed
- {
- 'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288',
- 'info_dict': {
- 'id': 'master',
- 'ext': 'mp4',
- 'title': 'Une nouvelle espèce de dinosaure découverte en Argentine',
- 'uploader': 'www.rtl.be',
- },
- 'params': {
- # m3u8 downloads
- 'skip_download': True,
- },
- },
- # twitter:player embed
- {
- 'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
- 'md5': 'a3e0df96369831de324f0778e126653c',
- 'info_dict': {
- 'id': '4909620399001',
- 'ext': 'mp4',
- 'title': 'What Do Black Holes Sound Like?',
- 'description': 'what do black holes sound like',
- 'upload_date': '20160524',
- 'uploader_id': '29913724001',
- 'timestamp': 1464107587,
- 'uploader': 'TheAtlantic',
- },
- 'add_ie': ['BrightcoveLegacy'],
- },
- # Facebook <iframe> embed
- {
- 'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
- 'md5': 'fbcde74f534176ecb015849146dd3aee',
- 'info_dict': {
- 'id': '599637780109885',
- 'ext': 'mp4',
- 'title': 'Facebook video #599637780109885',
- },
- },
- # Facebook <iframe> embed, plugin video
- {
- 'url': 'http://5pillarsuk.com/2017/06/07/tariq-ramadan-disagrees-with-pr-exercise-by-imams-refusing-funeral-prayers-for-london-attackers/',
- 'info_dict': {
- 'id': '1754168231264132',
- 'ext': 'mp4',
- 'title': 'About the Imams and Religious leaders refusing to perform funeral prayers for...',
- 'uploader': 'Tariq Ramadan (official)',
- 'timestamp': 1496758379,
- 'upload_date': '20170606',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- # Facebook API embed
- {
- 'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
- 'md5': 'a47372ee61b39a7b90287094d447d94e',
- 'info_dict': {
- 'id': '10153467542406923',
- 'ext': 'mp4',
- 'title': 'Facebook video #10153467542406923',
- },
- },
- # Wordpress "YouTube Video Importer" plugin
- {
- 'url': 'http://www.lothype.com/blue-devils-drumline-stanford-lot-2016/',
- 'md5': 'd16797741b560b485194eddda8121b48',
- 'info_dict': {
- 'id': 'HNTXWDXV9Is',
- 'ext': 'mp4',
- 'title': 'Blue Devils Drumline Stanford lot 2016',
- 'upload_date': '20160627',
- 'uploader_id': 'GENOCIDE8GENERAL10',
- 'uploader': 'cylus cyrus',
- },
- },
- {
- # video stored on custom kaltura server
- 'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
- 'md5': '537617d06e64dfed891fa1593c4b30cc',
- 'info_dict': {
- 'id': '0_1iotm5bh',
- 'ext': 'mp4',
- 'title': 'Elecciones británicas: 5 lecciones para Rajoy',
- 'description': 'md5:435a89d68b9760b92ce67ed227055f16',
- 'uploader_id': 'videos.expansion@el-mundo.net',
- 'upload_date': '20150429',
- 'timestamp': 1430303472,
- },
- 'add_ie': ['Kaltura'],
- },
- {
- # Non-standard Vimeo embed
- 'url': 'https://openclassrooms.com/courses/understanding-the-web',
- 'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
- 'info_dict': {
- 'id': '148867247',
- 'ext': 'mp4',
- 'title': 'Understanding the web - Teaser',
- 'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
- 'upload_date': '20151214',
- 'uploader': 'OpenClassrooms',
- 'uploader_id': 'openclassrooms',
- },
- 'add_ie': ['Vimeo'],
- },
- {
- # generic vimeo embed that requires original URL passed as Referer
- 'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/',
- 'only_matching': True,
- },
- {
- 'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
- 'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
- 'info_dict': {
- 'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
- 'ext': 'mp4',
- 'title': 'Big Buck Bunny',
- 'description': 'Royalty free test video',
- 'timestamp': 1432816365,
- 'upload_date': '20150528',
- 'is_live': False,
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': [ArkenaIE.ie_key()],
- },
- {
- 'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/',
- 'info_dict': {
- 'id': '1c7141f46c',
- 'ext': 'mp4',
- 'title': 'НА КОСЪМ ОТ ВЗРИВ: Изтичане на газ на бензиностанция в Пловдив',
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': [Vbox7IE.ie_key()],
- },
- {
- # DBTV embeds
- 'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/',
- 'info_dict': {
- 'id': '43254897',
- 'title': 'Etter ett års planlegging, klaffet endelig alt: - Jeg måtte ta en liten dans',
- },
- 'playlist_mincount': 3,
- },
- {
- # Videa embeds
- 'url': 'http://forum.dvdtalk.com/movie-talk/623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style.html',
- 'info_dict': {
- 'id': '623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style',
- 'title': 'Deleted Magic - Star Wars: OT Deleted / Alt. Scenes Docu. Style - DVD Talk Forum',
- },
- 'playlist_mincount': 2,
- },
- {
- # 20 minuten embed
- 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
- 'info_dict': {
- 'id': '523629',
- 'ext': 'mp4',
- 'title': 'So kommen Sie bei Eis und Schnee sicher an',
- 'description': 'md5:117c212f64b25e3d95747e5276863f7d',
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': [TwentyMinutenIE.ie_key()],
- },
- {
- # VideoPress embed
- 'url': 'https://en.support.wordpress.com/videopress/',
- 'info_dict': {
- 'id': 'OcobLTqC',
- 'ext': 'm4v',
- 'title': 'IMG_5786',
- 'timestamp': 1435711927,
- 'upload_date': '20150701',
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': [VideoPressIE.ie_key()],
- },
- {
- # Rutube embed
- 'url': 'http://magazzino.friday.ru/videos/vipuski/kazan-2',
- 'info_dict': {
- 'id': '9b3d5bee0a8740bf70dfd29d3ea43541',
- 'ext': 'flv',
- 'title': 'Магаззино: Казань 2',
- 'description': 'md5:99bccdfac2269f0e8fdbc4bbc9db184a',
- 'uploader': 'Магаззино',
- 'upload_date': '20170228',
- 'uploader_id': '996642',
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': [RutubeIE.ie_key()],
- },
- {
- # ThePlatform embedded with whitespaces in URLs
- 'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
- 'only_matching': True,
- },
- {
- # Senate ISVP iframe https
- 'url': 'https://www.hsgac.senate.gov/hearings/canadas-fast-track-refugee-plan-unanswered-questions-and-implications-for-us-national-security',
- 'md5': 'fb8c70b0b515e5037981a2492099aab8',
- 'info_dict': {
- 'id': 'govtaff020316',
- 'ext': 'mp4',
- 'title': 'Integrated Senate Video Player',
- },
- 'add_ie': [SenateISVPIE.ie_key()],
- },
- {
- # Limelight embeds (1 channel embed + 4 media embeds)
- 'url': 'http://www.sedona.com/FacilitatorTraining2017',
- 'info_dict': {
- 'id': 'FacilitatorTraining2017',
- 'title': 'Facilitator Training 2017',
- },
- 'playlist_mincount': 5,
- },
- {
- # Limelight embed (LimelightPlayerUtil.embed)
- 'url': 'https://tv5.ca/videos?v=xuu8qowr291ri',
- 'info_dict': {
- 'id': '95d035dc5c8a401588e9c0e6bd1e9c92',
- 'ext': 'mp4',
- 'title': '07448641',
- 'timestamp': 1499890639,
- 'upload_date': '20170712',
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': ['LimelightMedia'],
- },
- {
- 'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/',
- 'info_dict': {
- 'id': 'standoff-with-walnut-creek-murder-suspect-ends-with-arrest',
- 'title': 'Standoff with Walnut Creek murder suspect ends',
- 'description': 'md5:3ccc48a60fc9441eeccfc9c469ebf788',
- },
- 'playlist_mincount': 4,
- },
- {
- # WashingtonPost embed
- 'url': 'http://www.vanityfair.com/hollywood/2017/04/donald-trump-tv-pitches',
- 'info_dict': {
- 'id': '8caf6e88-d0ec-11e5-90d3-34c2c42653ac',
- 'ext': 'mp4',
- 'title': "No one has seen the drama series based on Trump's life \u2014 until now",
- 'description': 'Donald Trump wanted a weekly TV drama based on his life. It never aired. But The Washington Post recently obtained a scene from the pilot script — and enlisted actors.',
- 'timestamp': 1455216756,
- 'uploader': 'The Washington Post',
- 'upload_date': '20160211',
- },
- 'add_ie': [WashingtonPostIE.ie_key()],
- },
- {
- # Mediaset embed
- 'url': 'http://www.tgcom24.mediaset.it/politica/serracchiani-voglio-vivere-in-una-societa-aperta-reazioni-sproporzionate-_3071354-201702a.shtml',
- 'info_dict': {
- 'id': '720642',
- 'ext': 'mp4',
- 'title': 'Serracchiani: "Voglio vivere in una società aperta, con tutela del patto di fiducia"',
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': [MediasetIE.ie_key()],
- },
- {
- # JOJ.sk embeds
- 'url': 'https://www.noviny.sk/slovensko/238543-slovenskom-sa-prehnala-vlna-silnych-burok',
- 'info_dict': {
- 'id': '238543-slovenskom-sa-prehnala-vlna-silnych-burok',
- 'title': 'Slovenskom sa prehnala vlna silných búrok',
- },
- 'playlist_mincount': 5,
- 'add_ie': [JojIE.ie_key()],
- },
- {
- # AMP embed (see https://www.ampproject.org/docs/reference/components/amp-video)
- 'url': 'https://tvrain.ru/amp/418921/',
- 'md5': 'cc00413936695987e8de148b67d14f1d',
- 'info_dict': {
- 'id': '418921',
- 'ext': 'mp4',
- 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
- },
- },
- {
- # vzaar embed
- 'url': 'http://help.vzaar.com/article/165-embedding-video',
- 'md5': '7e3919d9d2620b89e3e00bec7fe8c9d4',
- 'info_dict': {
- 'id': '8707641',
- 'ext': 'mp4',
- 'title': 'Building A Business Online: Principal Chairs Q & A',
- },
- },
- {
- # multiple HTML5 videos on one page
- 'url': 'https://www.paragon-software.com/home/rk-free/keyscenarios.html',
- 'info_dict': {
- 'id': 'keyscenarios',
- 'title': 'Rescue Kit 14 Free Edition - Getting started',
- },
- 'playlist_count': 4,
- },
- {
- # vshare embed
- 'url': 'https://youtube-dl-demo.neocities.org/vshare.html',
- 'md5': '17b39f55b5497ae8b59f5fbce8e35886',
- 'info_dict': {
- 'id': '0f64ce6',
- 'title': 'vl14062007715967',
- 'ext': 'mp4',
- }
- },
- {
- 'url': 'http://www.heidelberg-laureate-forum.org/blog/video/lecture-friday-september-23-2016-sir-c-antony-r-hoare/',
- 'md5': 'aecd089f55b1cb5a59032cb049d3a356',
- 'info_dict': {
- 'id': '90227f51a80c4d8f86c345a7fa62bd9a1d',
- 'ext': 'mp4',
- 'title': 'Lecture: Friday, September 23, 2016 - Sir Tony Hoare',
- 'description': 'md5:5a51db84a62def7b7054df2ade403c6c',
- 'timestamp': 1474354800,
- 'upload_date': '20160920',
- }
- },
- {
- 'url': 'http://www.kidzworld.com/article/30935-trolls-the-beat-goes-on-interview-skylar-astin-and-amanda-leighton',
- 'info_dict': {
- 'id': '1731611',
- 'ext': 'mp4',
- 'title': 'Official Trailer | TROLLS: THE BEAT GOES ON!',
- 'description': 'md5:eb5f23826a027ba95277d105f248b825',
- 'timestamp': 1516100691,
- 'upload_date': '20180116',
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': [SpringboardPlatformIE.ie_key()],
- },
- {
- 'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
- 'info_dict': {
- 'id': 'uPDB5I9wfp8',
- 'ext': 'webm',
- 'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
- 'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
- 'upload_date': '20160219',
- 'uploader': 'Pocoyo - Português (BR)',
- 'uploader_id': 'PocoyoBrazil',
- },
- 'add_ie': [YoutubeIE.ie_key()],
- 'params': {
- 'skip_download': True,
- },
- },
- {
- 'url': 'https://www.yapfiles.ru/show/1872528/690b05d3054d2dbe1e69523aa21bb3b1.mp4.html',
- 'info_dict': {
- 'id': 'vMDE4NzI1Mjgt690b',
- 'ext': 'mp4',
- 'title': 'Котята',
- },
- 'add_ie': [YapFilesIE.ie_key()],
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # CloudflareStream embed
- 'url': 'https://www.cloudflare.com/products/cloudflare-stream/',
- 'info_dict': {
- 'id': '31c9291ab41fac05471db4e73aa11717',
- 'ext': 'mp4',
- 'title': '31c9291ab41fac05471db4e73aa11717',
- },
- 'add_ie': [CloudflareStreamIE.ie_key()],
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # PeerTube embed
- 'url': 'https://joinpeertube.org/fr/home/',
- 'info_dict': {
- 'id': 'home',
- 'title': 'Reprenez le contrôle de vos vidéos ! #JoinPeertube',
- },
- 'playlist_count': 2,
- },
- {
- # Indavideo embed
- 'url': 'https://streetkitchen.hu/receptek/igy_kell_otthon_hamburgert_sutni/',
- 'info_dict': {
- 'id': '1693903',
- 'ext': 'mp4',
- 'title': 'Így kell otthon hamburgert sütni',
- 'description': 'md5:f5a730ecf900a5c852e1e00540bbb0f7',
- 'timestamp': 1426330212,
- 'upload_date': '20150314',
- 'uploader': 'StreetKitchen',
- 'uploader_id': '546363',
- },
- 'add_ie': [IndavideoEmbedIE.ie_key()],
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # APA embed via JWPlatform embed
- 'url': 'http://www.vol.at/blue-man-group/5593454',
- 'info_dict': {
- 'id': 'jjv85FdZ',
- 'ext': 'mp4',
- 'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
- 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 254,
- 'timestamp': 1519211149,
- 'upload_date': '20180221',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- 'url': 'http://share-videos.se/auto/video/83645793?uid=13',
- 'md5': 'b68d276de422ab07ee1d49388103f457',
- 'info_dict': {
- 'id': '83645793',
- 'title': 'Lock up and get excited',
- 'ext': 'mp4'
- },
- 'skip': 'TODO: fix nested playlists processing in tests',
- },
- {
- # Viqeo embeds
- 'url': 'https://viqeo.tv/',
- 'info_dict': {
- 'id': 'viqeo',
- 'title': 'All-new video platform',
- },
- 'playlist_count': 6,
- },
- {
- # Squarespace video embed, 2019-08-28
- 'url': 'http://ootboxford.com',
- 'info_dict': {
- 'id': 'Tc7b_JGdZfw',
- 'title': 'Out of the Blue, at Childish Things 10',
- 'ext': 'mp4',
- 'description': 'md5:a83d0026666cf5ee970f8bd1cfd69c7f',
- 'uploader_id': 'helendouglashouse',
- 'uploader': 'Helen & Douglas House',
- 'upload_date': '20140328',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # Zype embed
- 'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
- 'info_dict': {
- 'id': '5b400b834b32992a310622b9',
- 'ext': 'mp4',
- 'title': 'Smoky Barbecue Favorites',
- 'thumbnail': r're:^https?://.*\.jpe?g',
- },
- 'add_ie': [ZypeIE.ie_key()],
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # videojs embed
- 'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
- 'info_dict': {
- 'id': 'shell',
- 'ext': 'mp4',
- 'title': 'Доставщик пиццы спросил разрешения сыграть на фортепиано',
- 'description': 'md5:89209cdc587dab1e4a090453dbaa2cb1',
- 'thumbnail': r're:^https?://.*\.jpg$',
- },
- 'params': {
- 'skip_download': True,
- },
- 'expected_warnings': ['Failed to download MPD manifest'],
- },
- {
- # DailyMotion embed with DM.player
- 'url': 'https://www.beinsports.com/us/copa-del-rey/video/the-locker-room-valencia-beat-barca-in-copa/1203804',
- 'info_dict': {
- 'id': 'k6aKkGHd9FJs4mtJN39',
- 'ext': 'mp4',
- 'title': 'The Locker Room: Valencia Beat Barca In Copa del Rey Final',
- 'description': 'This video is private.',
- 'uploader_id': 'x1jf30l',
- 'uploader': 'beIN SPORTS USA',
- 'upload_date': '20190528',
- 'timestamp': 1559062971,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- # {
- # # TODO: find another test
- # # http://schema.org/VideoObject
- # 'url': 'https://flipagram.com/f/nyvTSJMKId',
- # 'md5': '888dcf08b7ea671381f00fab74692755',
- # 'info_dict': {
- # 'id': 'nyvTSJMKId',
- # 'ext': 'mp4',
- # 'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
- # 'description': '#love for cats.',
- # 'timestamp': 1461244995,
- # 'upload_date': '20160421',
- # },
- # 'params': {
- # 'force_generic_extractor': True,
- # },
- # }
- ]
-
- def report_following_redirect(self, new_url):
- """Report information extraction."""
- self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
-
- def _extract_rss(self, url, video_id, doc):
- playlist_title = doc.find('./channel/title').text
- playlist_desc_el = doc.find('./channel/description')
- playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
-
- entries = []
- for it in doc.findall('./channel/item'):
- next_url = None
- enclosure_nodes = it.findall('./enclosure')
- for e in enclosure_nodes:
- next_url = e.attrib.get('url')
- if next_url:
- break
-
- if not next_url:
- next_url = xpath_text(it, 'link', fatal=False)
-
- if not next_url:
- continue
-
- entries.append({
- '_type': 'url_transparent',
- 'url': next_url,
- 'title': it.find('title').text,
- })
-
- return {
- '_type': 'playlist',
- 'id': url,
- 'title': playlist_title,
- 'description': playlist_desc,
- 'entries': entries,
- }
-
- def _extract_camtasia(self, url, video_id, webpage):
- """ Returns None if no camtasia video can be found. """
-
- camtasia_cfg = self._search_regex(
- r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
- webpage, 'camtasia configuration file', default=None)
- if camtasia_cfg is None:
- return None
-
- title = self._html_search_meta('DC.title', webpage, fatal=True)
-
- camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
- camtasia_cfg = self._download_xml(
- camtasia_url, video_id,
- note='Downloading camtasia configuration',
- errnote='Failed to download camtasia configuration')
- fileset_node = camtasia_cfg.find('./playlist/array/fileset')
-
- entries = []
- for n in fileset_node.getchildren():
- url_n = n.find('./uri')
- if url_n is None:
- continue
-
- entries.append({
- 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
- 'title': '%s - %s' % (title, n.tag),
- 'url': compat_urlparse.urljoin(url, url_n.text),
- 'duration': float_or_none(n.find('./duration').text),
- })
-
- return {
- '_type': 'playlist',
- 'entries': entries,
- 'title': title,
- }
-
- def _real_extract(self, url):
- if url.startswith('//'):
- return self.url_result(self.http_scheme() + url)
-
- parsed_url = compat_urlparse.urlparse(url)
- if not parsed_url.scheme:
- default_search = self._downloader.params.get('default_search')
- if default_search is None:
- default_search = 'fixup_error'
-
- if default_search in ('auto', 'auto_warning', 'fixup_error'):
- if re.match(r'^[^\s/]+\.[^\s/]+/', url):
- self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
- return self.url_result('http://' + url)
- elif default_search != 'fixup_error':
- if default_search == 'auto_warning':
- if re.match(r'^(?:url|URL)$', url):
- raise ExtractorError(
- 'Invalid URL: %r . Call youtube-dl like this: youtube-dl -v "https://www.youtube.com/watch?v=BaW_jenozKc" ' % url,
- expected=True)
- else:
- self._downloader.report_warning(
- 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
- return self.url_result('ytsearch:' + url)
-
- if default_search in ('error', 'fixup_error'):
- raise ExtractorError(
- '%r is not a valid URL. '
- 'Set --default-search "ytsearch" (or run youtube-dl "ytsearch:%s" ) to search YouTube'
- % (url, url), expected=True)
- else:
- if ':' not in default_search:
- default_search += ':'
- return self.url_result(default_search + url)
-
- url, smuggled_data = unsmuggle_url(url)
- force_videoid = None
- is_intentional = smuggled_data and smuggled_data.get('to_generic')
- if smuggled_data and 'force_videoid' in smuggled_data:
- force_videoid = smuggled_data['force_videoid']
- video_id = force_videoid
- else:
- video_id = self._generic_id(url)
-
- self.to_screen('%s: Requesting header' % video_id)
-
- head_req = HEADRequest(url)
- head_response = self._request_webpage(
- head_req, video_id,
- note=False, errnote='Could not send HEAD request to %s' % url,
- fatal=False)
-
- if head_response is not False:
- # Check for redirect
- new_url = compat_str(head_response.geturl())
- if url != new_url:
- self.report_following_redirect(new_url)
- if force_videoid:
- new_url = smuggle_url(
- new_url, {'force_videoid': force_videoid})
- return self.url_result(new_url)
-
- full_response = None
- if head_response is False:
- request = sanitized_Request(url)
- request.add_header('Accept-Encoding', '*')
- full_response = self._request_webpage(request, video_id)
- head_response = full_response
-
- info_dict = {
- 'id': video_id,
- 'title': self._generic_title(url),
- 'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
- }
-
- # Check for direct link to a video
- content_type = head_response.headers.get('Content-Type', '').lower()
- m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
- if m:
- format_id = compat_str(m.group('format_id'))
- if format_id.endswith('mpegurl'):
- formats = self._extract_m3u8_formats(url, video_id, 'mp4')
- elif format_id == 'f4m':
- formats = self._extract_f4m_formats(url, video_id)
- else:
- formats = [{
- 'format_id': format_id,
- 'url': url,
- 'vcodec': 'none' if m.group('type') == 'audio' else None
- }]
- info_dict['direct'] = True
- self._sort_formats(formats)
- info_dict['formats'] = formats
- return info_dict
-
- if not self._downloader.params.get('test', False) and not is_intentional:
- force = self._downloader.params.get('force_generic_extractor', False)
- self._downloader.report_warning(
- '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
-
- if not full_response:
- request = sanitized_Request(url)
- # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
- # making it impossible to download only chunk of the file (yet we need only 512kB to
- # test whether it's HTML or not). According to youtube-dl default Accept-Encoding
- # that will always result in downloading the whole file that is not desirable.
- # Therefore for extraction pass we have to override Accept-Encoding to any in order
- # to accept raw bytes and being able to download only a chunk.
- # It may probably better to solve this by checking Content-Type for application/octet-stream
- # after HEAD request finishes, but not sure if we can rely on this.
- request.add_header('Accept-Encoding', '*')
- full_response = self._request_webpage(request, video_id)
-
- first_bytes = full_response.read(512)
-
- # Is it an M3U playlist?
- if first_bytes.startswith(b'#EXTM3U'):
- info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
- self._sort_formats(info_dict['formats'])
- return info_dict
-
- # Maybe it's a direct link to a video?
- # Be careful not to download the whole thing!
- if not is_html(first_bytes):
- self._downloader.report_warning(
- 'URL could be a direct video link, returning it as such.')
- info_dict.update({
- 'direct': True,
- 'url': url,
- })
- return info_dict
-
- webpage = self._webpage_read_content(
- full_response, url, video_id, prefix=first_bytes)
-
- self.report_extraction(video_id)
-
- # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
- try:
- doc = compat_etree_fromstring(webpage.encode('utf-8'))
- if doc.tag == 'rss':
- return self._extract_rss(url, video_id, doc)
- elif doc.tag == 'SmoothStreamingMedia':
- info_dict['formats'] = self._parse_ism_formats(doc, url)
- self._sort_formats(info_dict['formats'])
- return info_dict
- elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
- smil = self._parse_smil(doc, url, video_id)
- self._sort_formats(smil['formats'])
- return smil
- elif doc.tag == '{http://xspf.org/ns/0/}playlist':
- return self.playlist_result(
- self._parse_xspf(
- doc, video_id, xspf_url=url,
- xspf_base_url=compat_str(full_response.geturl())),
- video_id)
- elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
- info_dict['formats'] = self._parse_mpd_formats(
- doc,
- mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0],
- mpd_url=url)
- self._sort_formats(info_dict['formats'])
- return info_dict
- elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
- info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
- self._sort_formats(info_dict['formats'])
- return info_dict
- except compat_xml_parse_error:
- pass
-
- # Is it a Camtasia project?
- camtasia_res = self._extract_camtasia(url, video_id, webpage)
- if camtasia_res is not None:
- return camtasia_res
-
- # Sometimes embedded video player is hidden behind percent encoding
- # (e.g. https://github.com/ytdl-org/youtube-dl/issues/2448)
- # Unescaping the whole page allows to handle those cases in a generic way
- webpage = compat_urllib_parse_unquote(webpage)
-
- # Unescape squarespace embeds to be detected by generic extractor,
- # see https://github.com/ytdl-org/youtube-dl/issues/21294
- webpage = re.sub(
- r'<div[^>]+class=[^>]*?\bsqs-video-wrapper\b[^>]*>',
- lambda x: unescapeHTML(x.group(0)), webpage)
-
- # it's tempting to parse this further, but you would
- # have to take into account all the variations like
- # Video Title - Site Name
- # Site Name | Video Title
- # Video Title - Tagline | Site Name
- # and so on and so forth; it's just not practical
- video_title = self._og_search_title(
- webpage, default=None) or self._html_search_regex(
- r'(?s)<title>(.*?)</title>', webpage, 'video title',
- default='video')
-
- # Try to detect age limit automatically
- age_limit = self._rta_search(webpage)
- # And then there are the jokers who advertise that they use RTA,
- # but actually don't.
- AGE_LIMIT_MARKERS = [
- r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
- ]
- if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
- age_limit = 18
-
- # video uploader is domain name
- video_uploader = self._search_regex(
- r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
-
- video_description = self._og_search_description(webpage, default=None)
- video_thumbnail = self._og_search_thumbnail(webpage, default=None)
-
- info_dict.update({
- 'title': video_title,
- 'description': video_description,
- 'thumbnail': video_thumbnail,
- 'age_limit': age_limit,
- })
-
- # Look for Brightcove Legacy Studio embeds
- bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
- if bc_urls:
- entries = [{
- '_type': 'url',
- 'url': smuggle_url(bc_url, {'Referer': url}),
- 'ie_key': 'BrightcoveLegacy'
- } for bc_url in bc_urls]
-
- return {
- '_type': 'playlist',
- 'title': video_title,
- 'id': video_id,
- 'entries': entries,
- }
-
- # Look for Brightcove New Studio embeds
- bc_urls = BrightcoveNewIE._extract_urls(self, webpage)
- if bc_urls:
- return self.playlist_from_matches(
- bc_urls, video_id, video_title,
- getter=lambda x: smuggle_url(x, {'referrer': url}),
- ie='BrightcoveNew')
-
- # Look for Nexx embeds
- nexx_urls = NexxIE._extract_urls(webpage)
- if nexx_urls:
- return self.playlist_from_matches(nexx_urls, video_id, video_title, ie=NexxIE.ie_key())
-
- # Look for Nexx iFrame embeds
- nexx_embed_urls = NexxEmbedIE._extract_urls(webpage)
- if nexx_embed_urls:
- return self.playlist_from_matches(nexx_embed_urls, video_id, video_title, ie=NexxEmbedIE.ie_key())
-
- # Look for ThePlatform embeds
- tp_urls = ThePlatformIE._extract_urls(webpage)
- if tp_urls:
- return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
-
- # Look for embedded rtl.nl player
- matches = re.findall(
- r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
- webpage)
- if matches:
- return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
-
- vimeo_urls = VimeoIE._extract_urls(url, webpage)
- if vimeo_urls:
- return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
-
- vid_me_embed_url = self._search_regex(
- r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
- webpage, 'vid.me embed', default=None)
- if vid_me_embed_url is not None:
- return self.url_result(vid_me_embed_url, 'Vidme')
-
- # Look for YouTube embeds
- youtube_urls = YoutubeIE._extract_urls(webpage)
- if youtube_urls:
- return self.playlist_from_matches(
- youtube_urls, video_id, video_title, ie=YoutubeIE.ie_key())
-
- matches = DailymotionIE._extract_urls(webpage)
- if matches:
- return self.playlist_from_matches(matches, video_id, video_title)
-
- # Look for embedded Dailymotion playlist player (#3822)
- m = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
- if m:
- playlists = re.findall(
- r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
- if playlists:
- return self.playlist_from_matches(
- playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
-
- # Look for DailyMail embeds
- dailymail_urls = DailyMailIE._extract_urls(webpage)
- if dailymail_urls:
- return self.playlist_from_matches(
- dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
-
- # Look for embedded Wistia player
- wistia_url = WistiaIE._extract_url(webpage)
- if wistia_url:
- return {
- '_type': 'url_transparent',
- 'url': self._proto_relative_url(wistia_url),
- 'ie_key': WistiaIE.ie_key(),
- 'uploader': video_uploader,
- }
-
- # Look for SVT player
- svt_url = SVTIE._extract_url(webpage)
- if svt_url:
- return self.url_result(svt_url, 'SVT')
-
- # Look for Bandcamp pages with custom domain
- mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
- if mobj is not None:
- burl = unescapeHTML(mobj.group(1))
- # Don't set the extractor because it can be a track url or an album
- return self.url_result(burl)
-
- # Look for embedded Vevo player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
- # Look for embedded Viddler player
- mobj = re.search(
- r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
- # Look for NYTimes player
- mobj = re.search(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
- # Look for Libsyn player
- mobj = re.search(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
- # Look for Ooyala videos
- mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage)
- or re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage)
- or re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage)
- or re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage)
- or re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
- if mobj is not None:
- embed_token = self._search_regex(
- r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
- webpage, 'ooyala embed token', default=None)
- return OoyalaIE._build_url_result(smuggle_url(
- mobj.group('ec'), {
- 'domain': url,
- 'embed_token': embed_token,
- }))
-
- # Look for multiple Ooyala embeds on SBN network websites
- mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
- if mobj is not None:
- embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
- if embeds:
- return self.playlist_from_matches(
- embeds, video_id, video_title,
- getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
-
- # Look for Aparat videos
- mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
- if mobj is not None:
- return self.url_result(mobj.group(1), 'Aparat')
-
- # Look for MPORA videos
- mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
- if mobj is not None:
- return self.url_result(mobj.group(1), 'Mpora')
-
- # Look for embedded Facebook player
- facebook_urls = FacebookIE._extract_urls(webpage)
- if facebook_urls:
- return self.playlist_from_matches(facebook_urls, video_id, video_title)
-
- # Look for embedded VK player
- mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'VK')
-
- # Look for embedded Odnoklassniki player
- mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Odnoklassniki')
-
- # Look for embedded ivi player
- mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Ivi')
-
- # Look for embedded Huffington Post player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'HuffPost')
-
- # Look for embed.ly
- mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
- mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
- if mobj is not None:
- return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
-
- # Look for funnyordie embed
- matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
- if matches:
- return self.playlist_from_matches(
- matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
-
- # Look for BBC iPlayer embed
- matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
- if matches:
- return self.playlist_from_matches(matches, video_id, video_title, ie='BBCCoUk')
-
- # Look for embedded RUTV player
- rutv_url = RUTVIE._extract_url(webpage)
- if rutv_url:
- return self.url_result(rutv_url, 'RUTV')
-
- # Look for embedded TVC player
- tvc_url = TVCIE._extract_url(webpage)
- if tvc_url:
- return self.url_result(tvc_url, 'TVC')
-
- # Look for embedded SportBox player
- sportbox_urls = SportBoxIE._extract_urls(webpage)
- if sportbox_urls:
- return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie=SportBoxIE.ie_key())
-
- # Look for embedded XHamster player
- xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
- if xhamster_urls:
- return self.playlist_from_matches(xhamster_urls, video_id, video_title, ie='XHamsterEmbed')
-
- # Look for embedded TNAFlixNetwork player
- tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
- if tnaflix_urls:
- return self.playlist_from_matches(tnaflix_urls, video_id, video_title, ie=TNAFlixNetworkEmbedIE.ie_key())
-
- # Look for embedded PornHub player
- pornhub_urls = PornHubIE._extract_urls(webpage)
- if pornhub_urls:
- return self.playlist_from_matches(pornhub_urls, video_id, video_title, ie=PornHubIE.ie_key())
-
- # Look for embedded DrTuber player
- drtuber_urls = DrTuberIE._extract_urls(webpage)
- if drtuber_urls:
- return self.playlist_from_matches(drtuber_urls, video_id, video_title, ie=DrTuberIE.ie_key())
-
- # Look for embedded RedTube player
- redtube_urls = RedTubeIE._extract_urls(webpage)
- if redtube_urls:
- return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key())
-
- # Look for embedded Tube8 player
- tube8_urls = Tube8IE._extract_urls(webpage)
- if tube8_urls:
- return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key())
-
- # Look for embedded Tvigle player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Tvigle')
-
- # Look for embedded TED player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'TED')
-
- # Look for embedded Ustream videos
- ustream_url = UstreamIE._extract_url(webpage)
- if ustream_url:
- return self.url_result(ustream_url, UstreamIE.ie_key())
-
- # Look for embedded arte.tv player
- mobj = re.search(
- r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'ArteTVEmbed')
-
- # Look for embedded francetv player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
- # Look for embedded smotri.com player
- smotri_url = SmotriIE._extract_url(webpage)
- if smotri_url:
- return self.url_result(smotri_url, 'Smotri')
-
- # Look for embedded Myvi.ru player
- myvi_url = MyviIE._extract_url(webpage)
- if myvi_url:
- return self.url_result(myvi_url)
-
- # Look for embedded soundcloud player
- soundcloud_urls = SoundcloudIE._extract_urls(webpage)
- if soundcloud_urls:
- return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
-
- # Look for tunein player
- tunein_urls = TuneInBaseIE._extract_urls(webpage)
- if tunein_urls:
- return self.playlist_from_matches(tunein_urls, video_id, video_title)
-
- # Look for embedded mtvservices player
- mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
- if mtvservices_url:
- return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
-
- # Look for embedded yahoo player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Yahoo')
-
- # Look for embedded sbs.com.au player
- mobj = re.search(
- r'''(?x)
- (?:
- <meta\s+property="og:video"\s+content=|
- <iframe[^>]+?src=
- )
- (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'SBS')
-
- # Look for embedded Cinchcast player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Cinchcast')
-
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
- webpage)
- if not mobj:
- mobj = re.search(
- r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'MLB')
-
- mobj = re.search(
- r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
- webpage)
- if mobj is not None:
- return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
-
- mobj = re.search(
- r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
- webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Livestream')
-
- # Look for Zapiks embed
- mobj = re.search(
- r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Zapiks')
-
- # Look for Kaltura embeds
- kaltura_url = KalturaIE._extract_url(webpage)
- if kaltura_url:
- return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
-
- # Look for EaglePlatform embeds
- eagleplatform_url = EaglePlatformIE._extract_url(webpage)
- if eagleplatform_url:
- return self.url_result(smuggle_url(eagleplatform_url, {'referrer': url}), EaglePlatformIE.ie_key())
-
- # Look for ClipYou (uses EaglePlatform) embeds
- mobj = re.search(
- r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
- if mobj is not None:
- return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
-
- # Look for Pladform embeds
- pladform_url = PladformIE._extract_url(webpage)
- if pladform_url:
- return self.url_result(pladform_url)
-
- # Look for Videomore embeds
- videomore_url = VideomoreIE._extract_url(webpage)
- if videomore_url:
- return self.url_result(videomore_url)
-
- # Look for Webcaster embeds
- webcaster_url = WebcasterFeedIE._extract_url(self, webpage)
- if webcaster_url:
- return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key())
-
- # Look for Playwire embeds
- mobj = re.search(
- r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
- # Look for 5min embeds
- mobj = re.search(
- r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
- if mobj is not None:
- return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
-
- # Look for Crooks and Liars embeds
- mobj = re.search(
- r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'))
-
- # Look for NBC Sports VPlayer embeds
- nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
- if nbc_sports_url:
- return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
-
- # Look for NBC News embeds
- nbc_news_embed_url = re.search(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
- if nbc_news_embed_url:
- return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews')
-
- # Look for Google Drive embeds
- google_drive_url = GoogleDriveIE._extract_url(webpage)
- if google_drive_url:
- return self.url_result(google_drive_url, 'GoogleDrive')
-
- # Look for UDN embeds
- mobj = re.search(
- r'<iframe[^>]+src="(?:https?:)?(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
- if mobj is not None:
- return self.url_result(
- compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
-
- # Look for Senate ISVP iframe
- senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
- if senate_isvp_url:
- return self.url_result(senate_isvp_url, 'SenateISVP')
-
- # Look for OnionStudios embeds
- onionstudios_url = OnionStudiosIE._extract_url(webpage)
- if onionstudios_url:
- return self.url_result(onionstudios_url)
-
- # Look for ViewLift embeds
- viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
- if viewlift_url:
- return self.url_result(viewlift_url)
-
- # Look for JWPlatform embeds
- jwplatform_urls = JWPlatformIE._extract_urls(webpage)
- if jwplatform_urls:
- return self.playlist_from_matches(jwplatform_urls, video_id, video_title, ie=JWPlatformIE.ie_key())
-
- # Look for Digiteka embeds
- digiteka_url = DigitekaIE._extract_url(webpage)
- if digiteka_url:
- return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
-
- # Look for Arkena embeds
- arkena_url = ArkenaIE._extract_url(webpage)
- if arkena_url:
- return self.url_result(arkena_url, ArkenaIE.ie_key())
-
- # Look for Piksel embeds
- piksel_url = PikselIE._extract_url(webpage)
- if piksel_url:
- return self.url_result(piksel_url, PikselIE.ie_key())
-
- # Look for Limelight embeds
- limelight_urls = LimelightBaseIE._extract_urls(webpage, url)
- if limelight_urls:
- return self.playlist_result(
- limelight_urls, video_id, video_title, video_description)
-
- # Look for Anvato embeds
- anvato_urls = AnvatoIE._extract_urls(self, webpage, video_id)
- if anvato_urls:
- return self.playlist_result(
- anvato_urls, video_id, video_title, video_description)
-
- # Look for AdobeTVVideo embeds
- mobj = re.search(
- r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
- webpage)
- if mobj is not None:
- return self.url_result(
- self._proto_relative_url(unescapeHTML(mobj.group(1))),
- 'AdobeTVVideo')
-
- # Look for Vine embeds
- mobj = re.search(
- r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
- webpage)
- if mobj is not None:
- return self.url_result(
- self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
-
- # Look for VODPlatform embeds
- mobj = re.search(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',
- webpage)
- if mobj is not None:
- return self.url_result(
- self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
-
- # Look for Mangomolo embeds
- mobj = re.search(
- r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/
- (?:
- video\?.*?\bid=(?P<video_id>\d+)|
- index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
- ).+?)\1''', webpage)
- if mobj is not None:
- info = {
- '_type': 'url_transparent',
- 'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
- 'title': video_title,
- 'description': video_description,
- 'thumbnail': video_thumbnail,
- 'uploader': video_uploader,
- }
- video_id = mobj.group('video_id')
- if video_id:
- info.update({
- 'ie_key': 'MangomoloVideo',
- 'id': video_id,
- })
- else:
- info.update({
- 'ie_key': 'MangomoloLive',
- 'id': mobj.group('channel_id'),
- })
- return info
-
- # Look for Instagram embeds
- instagram_embed_url = InstagramIE._extract_embed_url(webpage)
- if instagram_embed_url is not None:
- return self.url_result(
- self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
-
- # Look for LiveLeak embeds
- liveleak_urls = LiveLeakIE._extract_urls(webpage)
- if liveleak_urls:
- return self.playlist_from_matches(liveleak_urls, video_id, video_title)
-
- # Look for 3Q SDN embeds
- threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
- if threeqsdn_url:
- return {
- '_type': 'url_transparent',
- 'ie_key': ThreeQSDNIE.ie_key(),
- 'url': self._proto_relative_url(threeqsdn_url),
- 'title': video_title,
- 'description': video_description,
- 'thumbnail': video_thumbnail,
- 'uploader': video_uploader,
- }
-
- # Look for VBOX7 embeds
- vbox7_url = Vbox7IE._extract_url(webpage)
- if vbox7_url:
- return self.url_result(vbox7_url, Vbox7IE.ie_key())
-
- # Look for DBTV embeds
- dbtv_urls = DBTVIE._extract_urls(webpage)
- if dbtv_urls:
- return self.playlist_from_matches(dbtv_urls, video_id, video_title, ie=DBTVIE.ie_key())
-
- # Look for Videa embeds
- videa_urls = VideaIE._extract_urls(webpage)
- if videa_urls:
- return self.playlist_from_matches(videa_urls, video_id, video_title, ie=VideaIE.ie_key())
-
- # Look for 20 minuten embeds
- twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
- if twentymin_urls:
- return self.playlist_from_matches(
- twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
-
- # Look for Openload embeds
- openload_urls = OpenloadIE._extract_urls(webpage)
- if openload_urls:
- return self.playlist_from_matches(
- openload_urls, video_id, video_title, ie=OpenloadIE.ie_key())
-
- # Look for Verystream embeds
- verystream_urls = VerystreamIE._extract_urls(webpage)
- if verystream_urls:
- return self.playlist_from_matches(
- verystream_urls, video_id, video_title, ie=VerystreamIE.ie_key())
-
- # Look for VideoPress embeds
- videopress_urls = VideoPressIE._extract_urls(webpage)
- if videopress_urls:
- return self.playlist_from_matches(
- videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key())
-
- # Look for Rutube embeds
- rutube_urls = RutubeIE._extract_urls(webpage)
- if rutube_urls:
- return self.playlist_from_matches(
- rutube_urls, video_id, video_title, ie=RutubeIE.ie_key())
-
- # Look for WashingtonPost embeds
- wapo_urls = WashingtonPostIE._extract_urls(webpage)
- if wapo_urls:
- return self.playlist_from_matches(
- wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
-
- # Look for Mediaset embeds
- mediaset_urls = MediasetIE._extract_urls(self, webpage)
- if mediaset_urls:
- return self.playlist_from_matches(
- mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
-
- # Look for JOJ.sk embeds
- joj_urls = JojIE._extract_urls(webpage)
- if joj_urls:
- return self.playlist_from_matches(
- joj_urls, video_id, video_title, ie=JojIE.ie_key())
-
- # Look for megaphone.fm embeds
- mpfn_urls = MegaphoneIE._extract_urls(webpage)
- if mpfn_urls:
- return self.playlist_from_matches(
- mpfn_urls, video_id, video_title, ie=MegaphoneIE.ie_key())
-
- # Look for vzaar embeds
- vzaar_urls = VzaarIE._extract_urls(webpage)
- if vzaar_urls:
- return self.playlist_from_matches(
- vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key())
-
- channel9_urls = Channel9IE._extract_urls(webpage)
- if channel9_urls:
- return self.playlist_from_matches(
- channel9_urls, video_id, video_title, ie=Channel9IE.ie_key())
-
- vshare_urls = VShareIE._extract_urls(webpage)
- if vshare_urls:
- return self.playlist_from_matches(
- vshare_urls, video_id, video_title, ie=VShareIE.ie_key())
-
- # Look for Mediasite embeds
- mediasite_urls = MediasiteIE._extract_urls(webpage)
- if mediasite_urls:
- entries = [
- self.url_result(smuggle_url(
- compat_urlparse.urljoin(url, mediasite_url),
- {'UrlReferrer': url}), ie=MediasiteIE.ie_key())
- for mediasite_url in mediasite_urls]
- return self.playlist_result(entries, video_id, video_title)
-
- springboardplatform_urls = SpringboardPlatformIE._extract_urls(webpage)
- if springboardplatform_urls:
- return self.playlist_from_matches(
- springboardplatform_urls, video_id, video_title,
- ie=SpringboardPlatformIE.ie_key())
-
- yapfiles_urls = YapFilesIE._extract_urls(webpage)
- if yapfiles_urls:
- return self.playlist_from_matches(
- yapfiles_urls, video_id, video_title, ie=YapFilesIE.ie_key())
-
- vice_urls = ViceIE._extract_urls(webpage)
- if vice_urls:
- return self.playlist_from_matches(
- vice_urls, video_id, video_title, ie=ViceIE.ie_key())
-
- xfileshare_urls = XFileShareIE._extract_urls(webpage)
- if xfileshare_urls:
- return self.playlist_from_matches(
- xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key())
-
- cloudflarestream_urls = CloudflareStreamIE._extract_urls(webpage)
- if cloudflarestream_urls:
- return self.playlist_from_matches(
- cloudflarestream_urls, video_id, video_title, ie=CloudflareStreamIE.ie_key())
-
- peertube_urls = PeerTubeIE._extract_urls(webpage, url)
- if peertube_urls:
- return self.playlist_from_matches(
- peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
-
- teachable_url = TeachableIE._extract_url(webpage, url)
- if teachable_url:
- return self.url_result(teachable_url)
-
- indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
- if indavideo_urls:
- return self.playlist_from_matches(
- indavideo_urls, video_id, video_title, ie=IndavideoEmbedIE.ie_key())
-
- apa_urls = APAIE._extract_urls(webpage)
- if apa_urls:
- return self.playlist_from_matches(
- apa_urls, video_id, video_title, ie=APAIE.ie_key())
-
- foxnews_urls = FoxNewsIE._extract_urls(webpage)
- if foxnews_urls:
- return self.playlist_from_matches(
- foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key())
-
- sharevideos_urls = [sharevideos_mobj.group('url') for sharevideos_mobj in re.finditer(
- r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
- webpage)]
- if sharevideos_urls:
- return self.playlist_from_matches(
- sharevideos_urls, video_id, video_title)
-
- viqeo_urls = ViqeoIE._extract_urls(webpage)
- if viqeo_urls:
- return self.playlist_from_matches(
- viqeo_urls, video_id, video_title, ie=ViqeoIE.ie_key())
-
- expressen_urls = ExpressenIE._extract_urls(webpage)
- if expressen_urls:
- return self.playlist_from_matches(
- expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key())
-
- zype_urls = ZypeIE._extract_urls(webpage)
- if zype_urls:
- return self.playlist_from_matches(
- zype_urls, video_id, video_title, ie=ZypeIE.ie_key())
-
- # Look for HTML5 media
- entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
- if entries:
- if len(entries) == 1:
- entries[0].update({
- 'id': video_id,
- 'title': video_title,
- })
- else:
- for num, entry in enumerate(entries, start=1):
- entry.update({
- 'id': '%s-%s' % (video_id, num),
- 'title': '%s (%d)' % (video_title, num),
- })
- for entry in entries:
- self._sort_formats(entry['formats'])
- return self.playlist_result(entries, video_id, video_title)
-
- jwplayer_data = self._find_jwplayer_data(
- webpage, video_id, transform_source=js_to_json)
- if jwplayer_data:
- try:
- info = self._parse_jwplayer_data(
- jwplayer_data, video_id, require_title=False, base_url=url)
- return merge_dicts(info, info_dict)
- except ExtractorError:
- # See https://github.com/ytdl-org/youtube-dl/pull/16735
- pass
-
- # Video.js embed
- mobj = re.search(
- r'(?s)\bvideojs\s*\(.+?\.src\s*\(\s*((?:\[.+?\]|{.+?}))\s*\)\s*;',
- webpage)
- if mobj is not None:
- sources = self._parse_json(
- mobj.group(1), video_id, transform_source=js_to_json,
- fatal=False) or []
- if not isinstance(sources, list):
- sources = [sources]
- formats = []
- for source in sources:
- src = source.get('src')
- if not src or not isinstance(src, compat_str):
- continue
- src = compat_urlparse.urljoin(url, src)
- src_type = source.get('type')
- if isinstance(src_type, compat_str):
- src_type = src_type.lower()
- ext = determine_ext(src).lower()
- if src_type == 'video/youtube':
- return self.url_result(src, YoutubeIE.ie_key())
- if src_type == 'application/dash+xml' or ext == 'mpd':
- formats.extend(self._extract_mpd_formats(
- src, video_id, mpd_id='dash', fatal=False))
- elif src_type == 'application/x-mpegurl' or ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- src, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
- else:
- formats.append({
- 'url': src,
- 'ext': (mimetype2ext(src_type)
- or ext if ext in KNOWN_EXTENSIONS else 'mp4'),
- })
- if formats:
- self._sort_formats(formats)
- info_dict['formats'] = formats
- return info_dict
-
- # Looking for http://schema.org/VideoObject
- json_ld = self._search_json_ld(
- webpage, video_id, default={}, expected_type='VideoObject')
- if json_ld.get('url'):
- return merge_dicts(json_ld, info_dict)
-
- def check_video(vurl):
- if YoutubeIE.suitable(vurl):
- return True
- if RtmpIE.suitable(vurl):
- return True
- vpath = compat_urlparse.urlparse(vurl).path
- vext = determine_ext(vpath)
- return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml')
-
- def filter_video(urls):
- return list(filter(check_video, urls))
-
- # Start with something easy: JW Player in SWFObject
- found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
- if not found:
- # Look for gorilla-vid style embedding
- found = filter_video(re.findall(r'''(?sx)
- (?:
- jw_plugins|
- JWPlayerOptions|
- jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
- )
- .*?
- ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
- if not found:
- # Broaden the search a little bit
- found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
- if not found:
- # Broaden the findall a little bit: JWPlayer JS loader
- found = filter_video(re.findall(
- r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
- if not found:
- # Flow player
- found = filter_video(re.findall(r'''(?xs)
- flowplayer\("[^"]+",\s*
- \{[^}]+?\}\s*,
- \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
- ["']?url["']?\s*:\s*["']([^"']+)["']
- ''', webpage))
- if not found:
- # Cinerama player
- found = re.findall(
- r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
- if not found:
- # Try to find twitter cards info
- # twitter:player:stream should be checked before twitter:player since
- # it is expected to contain a raw stream (see
- # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
- found = filter_video(re.findall(
- r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
- if not found:
- # We look for Open Graph info:
- # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
- m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
- # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
- if m_video_type is not None:
- found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
- if not found:
- REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
- found = re.search(
- r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
- r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
- webpage)
- if not found:
- # Look also in Refresh HTTP header
- refresh_header = head_response.headers.get('Refresh')
- if refresh_header:
- # In python 2 response HTTP headers are bytestrings
- if sys.version_info < (3, 0) and isinstance(refresh_header, str):
- refresh_header = refresh_header.decode('iso-8859-1')
- found = re.search(REDIRECT_REGEX, refresh_header)
- if found:
- new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
- if new_url != url:
- self.report_following_redirect(new_url)
- return {
- '_type': 'url',
- 'url': new_url,
- }
- else:
- found = None
-
- if not found:
- # twitter:player is a https URL to iframe player that may or may not
- # be supported by youtube-dl thus this is checked the very last (see
- # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
- embed_url = self._html_search_meta('twitter:player', webpage, default=None)
- if embed_url and embed_url != url:
- return self.url_result(embed_url)
-
- if not found:
- raise UnsupportedError(url)
-
- entries = []
- for video_url in orderedSet(found):
- video_url = unescapeHTML(video_url)
- video_url = video_url.replace('\\/', '/')
- video_url = compat_urlparse.urljoin(url, video_url)
- video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
-
- # Sometimes, jwplayer extraction will result in a YouTube URL
- if YoutubeIE.suitable(video_url):
- entries.append(self.url_result(video_url, 'Youtube'))
- continue
-
- # here's a fun little line of code for you:
- video_id = os.path.splitext(video_id)[0]
-
- entry_info_dict = {
- 'id': video_id,
- 'uploader': video_uploader,
- 'title': video_title,
- 'age_limit': age_limit,
- }
-
- if RtmpIE.suitable(video_url):
- entry_info_dict.update({
- '_type': 'url_transparent',
- 'ie_key': RtmpIE.ie_key(),
- 'url': video_url,
- })
- entries.append(entry_info_dict)
- continue
-
- ext = determine_ext(video_url)
- if ext == 'smil':
- entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
- elif ext == 'xspf':
- return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
- elif ext == 'm3u8':
- entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
- elif ext == 'mpd':
- entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
- elif ext == 'f4m':
- entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
- elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
- # Just matching .ism/manifest is not enough to be reliably sure
- # whether it's actually an ISM manifest or some other streaming
- # manifest since there are various streaming URL formats
- # possible (see [1]) as well as some other shenanigans like
- # .smil/manifest URLs that actually serve an ISM (see [2]) and
- # so on.
- # Thus the most reasonable way to solve this is to delegate
- # to generic extractor in order to look into the contents of
- # the manifest itself.
- # 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
- # 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
- entry_info_dict = self.url_result(
- smuggle_url(video_url, {'to_generic': True}),
- GenericIE.ie_key())
- else:
- entry_info_dict['url'] = video_url
-
- if entry_info_dict.get('formats'):
- self._sort_formats(entry_info_dict['formats'])
-
- entries.append(entry_info_dict)
-
- if len(entries) == 1:
- return entries[0]
- else:
- for num, e in enumerate(entries, start=1):
- # 'url' results don't have a title
- if e.get('title') is not None:
- e['title'] = '%s (%d)' % (e['title'], num)
- return {
- '_type': 'playlist',
- 'entries': entries,
- }
diff --git a/youtube_dl/extractor/giantbomb.py b/youtube_dl/extractor/giantbomb.py
deleted file mode 100644
index 6a1b1e96e..000000000
--- a/youtube_dl/extractor/giantbomb.py
+++ /dev/null
@@ -1,87 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-import json
-
-from .common import InfoExtractor
-from ..utils import (
- determine_ext,
- int_or_none,
- qualities,
- unescapeHTML,
-)
-
-
-class GiantBombIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?giantbomb\.com/videos/(?P<display_id>[^/]+)/(?P<id>\d+-\d+)'
- _TEST = {
- 'url': 'http://www.giantbomb.com/videos/quick-look-destiny-the-dark-below/2300-9782/',
- 'md5': 'c8ea694254a59246a42831155dec57ac',
- 'info_dict': {
- 'id': '2300-9782',
- 'display_id': 'quick-look-destiny-the-dark-below',
- 'ext': 'mp4',
- 'title': 'Quick Look: Destiny: The Dark Below',
- 'description': 'md5:0aa3aaf2772a41b91d44c63f30dfad24',
- 'duration': 2399,
- 'thumbnail': r're:^https?://.*\.jpg$',
- }
- }
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- display_id = mobj.group('display_id')
-
- webpage = self._download_webpage(url, display_id)
-
- title = self._og_search_title(webpage)
- description = self._og_search_description(webpage)
- thumbnail = self._og_search_thumbnail(webpage)
-
- video = json.loads(unescapeHTML(self._search_regex(
- r'data-video="([^"]+)"', webpage, 'data-video')))
-
- duration = int_or_none(video.get('lengthSeconds'))
-
- quality = qualities([
- 'f4m_low', 'progressive_low', 'f4m_high',
- 'progressive_high', 'f4m_hd', 'progressive_hd'])
-
- formats = []
- for format_id, video_url in video['videoStreams'].items():
- if format_id == 'f4m_stream':
- continue
- ext = determine_ext(video_url)
- if ext == 'f4m':
- f4m_formats = self._extract_f4m_formats(video_url + '?hdcore=3.3.1', display_id)
- if f4m_formats:
- f4m_formats[0]['quality'] = quality(format_id)
- formats.extend(f4m_formats)
- elif ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- video_url, display_id, ext='mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
- else:
- formats.append({
- 'url': video_url,
- 'format_id': format_id,
- 'quality': quality(format_id),
- })
-
- if not formats:
- youtube_id = video.get('youtubeID')
- if youtube_id:
- return self.url_result(youtube_id, 'Youtube')
-
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py
deleted file mode 100644
index 9ad1d95fb..000000000
--- a/youtube_dl/extractor/globo.py
+++ /dev/null
@@ -1,234 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import base64
-import hashlib
-import json
-import random
-import re
-
-from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_str,
-)
-from ..utils import (
- ExtractorError,
- float_or_none,
- int_or_none,
- orderedSet,
- str_or_none,
-)
-
-
-class GloboIE(InfoExtractor):
- _VALID_URL = r'(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})'
- _NETRC_MACHINE = 'globo'
- _TESTS = [{
- 'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/',
- 'md5': 'b3ccc801f75cd04a914d51dadb83a78d',
- 'info_dict': {
- 'id': '3607726',
- 'ext': 'mp4',
- 'title': 'Mercedes-Benz GLA passa por teste de colisão na Europa',
- 'duration': 103.204,
- 'uploader': 'Globo.com',
- 'uploader_id': '265',
- },
- }, {
- 'url': 'http://globoplay.globo.com/v/4581987/',
- 'md5': 'f36a1ecd6a50da1577eee6dd17f67eff',
- 'info_dict': {
- 'id': '4581987',
- 'ext': 'mp4',
- 'title': 'Acidentes de trânsito estão entre as maiores causas de queda de energia em SP',
- 'duration': 137.973,
- 'uploader': 'Rede Globo',
- 'uploader_id': '196',
- },
- }, {
- 'url': 'http://canalbrasil.globo.com/programas/sangue-latino/videos/3928201.html',
- 'only_matching': True,
- }, {
- 'url': 'http://globosatplay.globo.com/globonews/v/4472924/',
- 'only_matching': True,
- }, {
- 'url': 'http://globotv.globo.com/t/programa/v/clipe-sexo-e-as-negas-adeus/3836166/',
- 'only_matching': True,
- }, {
- 'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/',
- 'only_matching': True,
- }, {
- 'url': 'http://canaloff.globo.com/programas/desejar-profundo/videos/4518560.html',
- 'only_matching': True,
- }, {
- 'url': 'globo:3607726',
- 'only_matching': True,
- }]
-
- def _real_initialize(self):
- email, password = self._get_login_info()
- if email is None:
- return
-
- try:
- glb_id = (self._download_json(
- 'https://login.globo.com/api/authentication', None, data=json.dumps({
- 'payload': {
- 'email': email,
- 'password': password,
- 'serviceId': 4654,
- },
- }).encode(), headers={
- 'Content-Type': 'application/json; charset=utf-8',
- }) or {}).get('glbId')
- if glb_id:
- self._set_cookie('.globo.com', 'GLBID', glb_id)
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
- resp = self._parse_json(e.cause.read(), None)
- raise ExtractorError(resp.get('userMessage') or resp['id'], expected=True)
- raise
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- video = self._download_json(
- 'http://api.globovideos.com/videos/%s/playlist' % video_id,
- video_id)['videos'][0]
- if video.get('encrypted') is True:
- raise ExtractorError('This video is DRM protected.', expected=True)
-
- title = video['title']
-
- formats = []
- subtitles = {}
- for resource in video['resources']:
- resource_id = resource.get('_id')
- resource_url = resource.get('url')
- resource_type = resource.get('type')
- if not resource_url or (resource_type == 'media' and not resource_id) or resource_type not in ('subtitle', 'media'):
- continue
-
- if resource_type == 'subtitle':
- subtitles.setdefault(resource.get('language') or 'por', []).append({
- 'url': resource_url,
- })
- continue
-
- security = self._download_json(
- 'http://security.video.globo.com/videos/%s/hash' % video_id,
- video_id, 'Downloading security hash for %s' % resource_id, query={
- 'player': 'desktop',
- 'version': '5.19.1',
- 'resource_id': resource_id,
- })
-
- security_hash = security.get('hash')
- if not security_hash:
- message = security.get('message')
- if message:
- raise ExtractorError(
- '%s returned error: %s' % (self.IE_NAME, message), expected=True)
- continue
-
- assert security_hash[:2] in ('04', '14')
- received_time = security_hash[3:13]
- received_md5 = security_hash[24:]
-
- sign_time = compat_str(int(received_time) + 86400)
- padding = '%010d' % random.randint(1, 10000000000)
-
- md5_data = (received_md5 + sign_time + padding + '0xAC10FD').encode()
- signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=')
- signed_hash = security_hash[:23] + sign_time + padding + signed_md5
-
- signed_url = '%s?h=%s&k=html5&a=%s&u=%s' % (resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A', security.get('user') or '')
- if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'):
- formats.extend(self._extract_m3u8_formats(
- signed_url, resource_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
- elif resource_id.endswith('mpd') or resource_url.endswith('.mpd'):
- formats.extend(self._extract_mpd_formats(
- signed_url, resource_id, mpd_id='dash', fatal=False))
- elif resource_id.endswith('manifest') or resource_url.endswith('/manifest'):
- formats.extend(self._extract_ism_formats(
- signed_url, resource_id, ism_id='mss', fatal=False))
- else:
- formats.append({
- 'url': signed_url,
- 'format_id': 'http-%s' % resource_id,
- 'height': int_or_none(resource.get('height')),
- })
-
- self._sort_formats(formats)
-
- duration = float_or_none(video.get('duration'), 1000)
- uploader = video.get('channel')
- uploader_id = str_or_none(video.get('channel_id'))
-
- return {
- 'id': video_id,
- 'title': title,
- 'duration': duration,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'formats': formats,
- 'subtitles': subtitles,
- }
-
-
-class GloboArticleIE(InfoExtractor):
- _VALID_URL = r'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/.]+)(?:\.html)?'
-
- _VIDEOID_REGEXES = [
- r'\bdata-video-id=["\'](\d{7,})',
- r'\bdata-player-videosids=["\'](\d{7,})',
- r'\bvideosIDs\s*:\s*["\']?(\d{7,})',
- r'\bdata-id=["\'](\d{7,})',
- r'<div[^>]+\bid=["\'](\d{7,})',
- ]
-
- _TESTS = [{
- 'url': 'http://g1.globo.com/jornal-nacional/noticia/2014/09/novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes.html',
- 'info_dict': {
- 'id': 'novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes',
- 'title': 'Novidade na fiscalização de bagagem pela Receita provoca discussões',
- 'description': 'md5:c3c4b4d4c30c32fce460040b1ac46b12',
- },
- 'playlist_count': 1,
- }, {
- 'url': 'http://g1.globo.com/pr/parana/noticia/2016/09/mpf-denuncia-lula-marisa-e-mais-seis-na-operacao-lava-jato.html',
- 'info_dict': {
- 'id': 'mpf-denuncia-lula-marisa-e-mais-seis-na-operacao-lava-jato',
- 'title': "Lula era o 'comandante máximo' do esquema da Lava Jato, diz MPF",
- 'description': 'md5:8aa7cc8beda4dc71cc8553e00b77c54c',
- },
- 'playlist_count': 6,
- }, {
- 'url': 'http://gq.globo.com/Prazeres/Poder/noticia/2015/10/all-o-desafio-assista-ao-segundo-capitulo-da-serie.html',
- 'only_matching': True,
- }, {
- 'url': 'http://gshow.globo.com/programas/tv-xuxa/O-Programa/noticia/2014/01/xuxa-e-junno-namoram-muuuito-em-luau-de-zeze-di-camargo-e-luciano.html',
- 'only_matching': True,
- }, {
- 'url': 'http://oglobo.globo.com/rio/a-amizade-entre-um-entregador-de-farmacia-um-piano-19946271',
- 'only_matching': True,
- }]
-
- @classmethod
- def suitable(cls, url):
- return False if GloboIE.suitable(url) else super(GloboArticleIE, cls).suitable(url)
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
- video_ids = []
- for video_regex in self._VIDEOID_REGEXES:
- video_ids.extend(re.findall(video_regex, webpage))
- entries = [
- self.url_result('globo:%s' % video_id, GloboIE.ie_key())
- for video_id in orderedSet(video_ids)]
- title = self._og_search_title(webpage, fatal=False)
- description = self._html_search_meta('description', webpage)
- return self.playlist_result(entries, display_id, title, description)
diff --git a/youtube_dl/extractor/go.py b/youtube_dl/extractor/go.py
deleted file mode 100644
index 03e48f4ea..000000000
--- a/youtube_dl/extractor/go.py
+++ /dev/null
@@ -1,227 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .adobepass import AdobePassIE
-from ..utils import (
- int_or_none,
- determine_ext,
- parse_age_limit,
- urlencode_postdata,
- ExtractorError,
-)
-
-
-class GoIE(AdobePassIE):
- _SITE_INFO = {
- 'abc': {
- 'brand': '001',
- 'requestor_id': 'ABC',
- },
- 'freeform': {
- 'brand': '002',
- 'requestor_id': 'ABCFamily',
- },
- 'watchdisneychannel': {
- 'brand': '004',
- 'resource_id': 'Disney',
- },
- 'watchdisneyjunior': {
- 'brand': '008',
- 'resource_id': 'DisneyJunior',
- },
- 'watchdisneyxd': {
- 'brand': '009',
- 'resource_id': 'DisneyXD',
- },
- 'disneynow': {
- 'brand': '011',
- 'resource_id': 'Disney',
- }
- }
- _VALID_URL = r'https?://(?:(?:(?P<sub_domain>%s)\.)?go|(?P<sub_domain_2>disneynow))\.com/(?:(?:[^/]+/)*(?P<id>vdka\w+)|(?:[^/]+/)*(?P<display_id>[^/?#]+))'\
- % '|'.join(list(_SITE_INFO.keys()) + ['disneynow'])
- _TESTS = [{
- 'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
- 'info_dict': {
- 'id': 'VDKA3807643',
- 'ext': 'mp4',
- 'title': 'The Traitor in the White House',
- 'description': 'md5:05b009d2d145a1e85d25111bd37222e8',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- 'url': 'http://watchdisneyxd.go.com/doraemon',
- 'info_dict': {
- 'title': 'Doraemon',
- 'id': 'SH55574025',
- },
- 'playlist_mincount': 51,
- }, {
- 'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
- 'only_matching': True,
- }, {
- 'url': 'http://abc.go.com/shows/world-news-tonight/episode-guide/2017-02/17-021717-intense-stand-off-between-man-with-rifle-and-police-in-oakland',
- 'only_matching': True,
- }, {
- # brand 004
- 'url': 'http://disneynow.go.com/shows/big-hero-6-the-series/season-01/episode-10-mr-sparkles-loses-his-sparkle/vdka4637915',
- 'only_matching': True,
- }, {
- # brand 008
- 'url': 'http://disneynow.go.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
- 'only_matching': True,
- }, {
- 'url': 'https://disneynow.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
- 'only_matching': True,
- }]
-
- def _extract_videos(self, brand, video_id='-1', show_id='-1'):
- display_id = video_id if video_id != '-1' else show_id
- return self._download_json(
- 'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/%s/-1/%s/-1/-1.json' % (brand, show_id, video_id),
- display_id)['video']
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- sub_domain = mobj.group('sub_domain') or mobj.group('sub_domain_2')
- video_id, display_id = mobj.group('id', 'display_id')
- site_info = self._SITE_INFO.get(sub_domain, {})
- brand = site_info.get('brand')
- if not video_id or not site_info:
- webpage = self._download_webpage(url, display_id or video_id)
- video_id = self._search_regex(
- # There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
- # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
- r'data-video-id=["\']*(VDKA\w+)', webpage, 'video id',
- default=video_id)
- if not site_info:
- brand = self._search_regex(
- (r'data-brand=\s*["\']\s*(\d+)',
- r'data-page-brand=\s*["\']\s*(\d+)'), webpage, 'brand',
- default='004')
- site_info = next(
- si for _, si in self._SITE_INFO.items()
- if si.get('brand') == brand)
- if not video_id:
- # show extraction works for Disney, DisneyJunior and DisneyXD
- # ABC and Freeform has different layout
- show_id = self._search_regex(r'data-show-id=["\']*(SH\d+)', webpage, 'show id')
- videos = self._extract_videos(brand, show_id=show_id)
- show_title = self._search_regex(r'data-show-title="([^"]+)"', webpage, 'show title', fatal=False)
- entries = []
- for video in videos:
- entries.append(self.url_result(
- video['url'], 'Go', video.get('id'), video.get('title')))
- entries.reverse()
- return self.playlist_result(entries, show_id, show_title)
- video_data = self._extract_videos(brand, video_id)[0]
- video_id = video_data['id']
- title = video_data['title']
-
- formats = []
- for asset in video_data.get('assets', {}).get('asset', []):
- asset_url = asset.get('value')
- if not asset_url:
- continue
- format_id = asset.get('format')
- ext = determine_ext(asset_url)
- if ext == 'm3u8':
- video_type = video_data.get('type')
- data = {
- 'video_id': video_data['id'],
- 'video_type': video_type,
- 'brand': brand,
- 'device': '001',
- }
- if video_data.get('accesslevel') == '1':
- requestor_id = site_info.get('requestor_id', 'DisneyChannels')
- resource = site_info.get('resource_id') or self._get_mvpd_resource(
- requestor_id, title, video_id, None)
- auth = self._extract_mvpd_auth(
- url, video_id, requestor_id, resource)
- data.update({
- 'token': auth,
- 'token_type': 'ap',
- 'adobe_requestor_id': requestor_id,
- })
- else:
- self._initialize_geo_bypass({'countries': ['US']})
- entitlement = self._download_json(
- 'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
- video_id, data=urlencode_postdata(data))
- errors = entitlement.get('errors', {}).get('errors', [])
- if errors:
- for error in errors:
- if error.get('code') == 1002:
- self.raise_geo_restricted(
- error['message'], countries=['US'])
- error_message = ', '.join([error['message'] for error in errors])
- raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
- asset_url += '?' + entitlement['uplynkData']['sessionKey']
- formats.extend(self._extract_m3u8_formats(
- asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False))
- else:
- f = {
- 'format_id': format_id,
- 'url': asset_url,
- 'ext': ext,
- }
- if re.search(r'(?:/mp4/source/|_source\.mp4)', asset_url):
- f.update({
- 'format_id': ('%s-' % format_id if format_id else '') + 'SOURCE',
- 'preference': 1,
- })
- else:
- mobj = re.search(r'/(\d+)x(\d+)/', asset_url)
- if mobj:
- height = int(mobj.group(2))
- f.update({
- 'format_id': ('%s-' % format_id if format_id else '') + '%dP' % height,
- 'width': int(mobj.group(1)),
- 'height': height,
- })
- formats.append(f)
- self._sort_formats(formats)
-
- subtitles = {}
- for cc in video_data.get('closedcaption', {}).get('src', []):
- cc_url = cc.get('value')
- if not cc_url:
- continue
- ext = determine_ext(cc_url)
- if ext == 'xml':
- ext = 'ttml'
- subtitles.setdefault(cc.get('lang'), []).append({
- 'url': cc_url,
- 'ext': ext,
- })
-
- thumbnails = []
- for thumbnail in video_data.get('thumbnails', {}).get('thumbnail', []):
- thumbnail_url = thumbnail.get('value')
- if not thumbnail_url:
- continue
- thumbnails.append({
- 'url': thumbnail_url,
- 'width': int_or_none(thumbnail.get('width')),
- 'height': int_or_none(thumbnail.get('height')),
- })
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': video_data.get('longdescription') or video_data.get('description'),
- 'duration': int_or_none(video_data.get('duration', {}).get('value'), 1000),
- 'age_limit': parse_age_limit(video_data.get('tvrating', {}).get('rating')),
- 'episode_number': int_or_none(video_data.get('episodenumber')),
- 'series': video_data.get('show', {}).get('title'),
- 'season_number': int_or_none(video_data.get('season', {}).get('num')),
- 'thumbnails': thumbnails,
- 'formats': formats,
- 'subtitles': subtitles,
- }
diff --git a/youtube_dl/extractor/go90.py b/youtube_dl/extractor/go90.py
deleted file mode 100644
index c3ea717bc..000000000
--- a/youtube_dl/extractor/go90.py
+++ /dev/null
@@ -1,149 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_HTTPError
-from ..utils import (
- determine_ext,
- ExtractorError,
- int_or_none,
- parse_age_limit,
- parse_iso8601,
-)
-
-
-class Go90IE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?go90\.com/(?:videos|embed)/(?P<id>[0-9a-zA-Z]+)'
- _TESTS = [{
- 'url': 'https://www.go90.com/videos/84BUqjLpf9D',
- 'md5': 'efa7670dbbbf21a7b07b360652b24a32',
- 'info_dict': {
- 'id': '84BUqjLpf9D',
- 'ext': 'mp4',
- 'title': 'Daily VICE - Inside The Utah Coalition Against Pornography Convention',
- 'description': 'VICE\'s Karley Sciortino meets with activists who discuss the state\'s strong anti-porn stance. Then, VICE Sports explains NFL contracts.',
- 'timestamp': 1491868800,
- 'upload_date': '20170411',
- 'age_limit': 14,
- }
- }, {
- 'url': 'https://www.go90.com/embed/261MflWkD3N',
- 'only_matching': True,
- }]
- _GEO_BYPASS = False
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- try:
- headers = self.geo_verification_headers()
- headers.update({
- 'Content-Type': 'application/json; charset=utf-8',
- })
- video_data = self._download_json(
- 'https://www.go90.com/api/view/items/' + video_id, video_id,
- headers=headers, data=b'{"client":"web","device_type":"pc"}')
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
- message = self._parse_json(e.cause.read().decode(), None)['error']['message']
- if 'region unavailable' in message:
- self.raise_geo_restricted(countries=['US'])
- raise ExtractorError(message, expected=True)
- raise
-
- if video_data.get('requires_drm'):
- raise ExtractorError('This video is DRM protected.', expected=True)
- main_video_asset = video_data['main_video_asset']
-
- episode_number = int_or_none(video_data.get('episode_number'))
- series = None
- season = None
- season_id = None
- season_number = None
- for metadata in video_data.get('__children', {}).get('Item', {}).values():
- if metadata.get('type') == 'show':
- series = metadata.get('title')
- elif metadata.get('type') == 'season':
- season = metadata.get('title')
- season_id = metadata.get('id')
- season_number = int_or_none(metadata.get('season_number'))
-
- title = episode = video_data.get('title') or series
- if series and series != title:
- title = '%s - %s' % (series, title)
-
- thumbnails = []
- formats = []
- subtitles = {}
- for asset in video_data.get('assets'):
- if asset.get('id') == main_video_asset:
- for source in asset.get('sources', []):
- source_location = source.get('location')
- if not source_location:
- continue
- source_type = source.get('type')
- if source_type == 'hls':
- m3u8_formats = self._extract_m3u8_formats(
- source_location, video_id, 'mp4',
- 'm3u8_native', m3u8_id='hls', fatal=False)
- for f in m3u8_formats:
- mobj = re.search(r'/hls-(\d+)-(\d+)K', f['url'])
- if mobj:
- height, tbr = mobj.groups()
- height = int_or_none(height)
- f.update({
- 'height': f.get('height') or height,
- 'width': f.get('width') or int_or_none(height / 9.0 * 16.0 if height else None),
- 'tbr': f.get('tbr') or int_or_none(tbr),
- })
- formats.extend(m3u8_formats)
- elif source_type == 'dash':
- formats.extend(self._extract_mpd_formats(
- source_location, video_id, mpd_id='dash', fatal=False))
- else:
- formats.append({
- 'format_id': source.get('name'),
- 'url': source_location,
- 'width': int_or_none(source.get('width')),
- 'height': int_or_none(source.get('height')),
- 'tbr': int_or_none(source.get('bitrate')),
- })
-
- for caption in asset.get('caption_metadata', []):
- caption_url = caption.get('source_url')
- if not caption_url:
- continue
- subtitles.setdefault(caption.get('language', 'en'), []).append({
- 'url': caption_url,
- 'ext': determine_ext(caption_url, 'vtt'),
- })
- elif asset.get('type') == 'image':
- asset_location = asset.get('location')
- if not asset_location:
- continue
- thumbnails.append({
- 'url': asset_location,
- 'width': int_or_none(asset.get('width')),
- 'height': int_or_none(asset.get('height')),
- })
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- 'thumbnails': thumbnails,
- 'description': video_data.get('short_description'),
- 'like_count': int_or_none(video_data.get('like_count')),
- 'timestamp': parse_iso8601(video_data.get('released_at')),
- 'series': series,
- 'episode': episode,
- 'season': season,
- 'season_id': season_id,
- 'season_number': season_number,
- 'episode_number': episode_number,
- 'subtitles': subtitles,
- 'age_limit': parse_age_limit(video_data.get('rating')),
- }
diff --git a/youtube_dl/extractor/hark.py b/youtube_dl/extractor/hark.py
deleted file mode 100644
index 342a6130e..000000000
--- a/youtube_dl/extractor/hark.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-
-
-class HarkIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?hark\.com/clips/(?P<id>.+?)-.+'
- _TEST = {
- 'url': 'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
- 'md5': '6783a58491b47b92c7c1af5a77d4cbee',
- 'info_dict': {
- 'id': 'mmbzyhkgny',
- 'ext': 'mp3',
- 'title': 'Obama: \'Beyond The Afghan Theater, We Only Target Al Qaeda\' on May 23, 2013',
- 'description': 'President Barack Obama addressed the nation live on May 23, 2013 in a speech aimed at addressing counter-terrorism policies including the use of drone strikes, detainees at Guantanamo Bay prison facility, and American citizens who are terrorists.',
- 'duration': 11,
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- data = self._download_json(
- 'http://www.hark.com/clips/%s.json' % video_id, video_id)
-
- return {
- 'id': video_id,
- 'url': data['url'],
- 'title': data['name'],
- 'description': data.get('description'),
- 'thumbnail': data.get('image_original'),
- 'duration': data.get('duration'),
- }
diff --git a/youtube_dl/extractor/hellporno.py b/youtube_dl/extractor/hellporno.py
deleted file mode 100644
index 0ee8ea712..000000000
--- a/youtube_dl/extractor/hellporno.py
+++ /dev/null
@@ -1,75 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- js_to_json,
- remove_end,
- determine_ext,
-)
-
-
-class HellPornoIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?hellporno\.(?:com/videos|net/v)/(?P<id>[^/]+)'
- _TESTS = [{
- 'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/',
- 'md5': '1fee339c610d2049699ef2aa699439f1',
- 'info_dict': {
- 'id': '149116',
- 'display_id': 'dixie-is-posing-with-naked-ass-very-erotic',
- 'ext': 'mp4',
- 'title': 'Dixie is posing with naked ass very erotic',
- 'thumbnail': r're:https?://.*\.jpg$',
- 'age_limit': 18,
- }
- }, {
- 'url': 'http://hellporno.net/v/186271/',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- title = remove_end(self._html_search_regex(
- r'<title>([^<]+)</title>', webpage, 'title'), ' - Hell Porno')
-
- flashvars = self._parse_json(self._search_regex(
- r'var\s+flashvars\s*=\s*({.+?});', webpage, 'flashvars'),
- display_id, transform_source=js_to_json)
-
- video_id = flashvars.get('video_id')
- thumbnail = flashvars.get('preview_url')
- ext = determine_ext(flashvars.get('postfix'), 'mp4')
-
- formats = []
- for video_url_key in ['video_url', 'video_alt_url']:
- video_url = flashvars.get(video_url_key)
- if not video_url:
- continue
- video_text = flashvars.get('%s_text' % video_url_key)
- fmt = {
- 'url': video_url,
- 'ext': ext,
- 'format_id': video_text,
- }
- m = re.search(r'^(?P<height>\d+)[pP]', video_text)
- if m:
- fmt['height'] = int(m.group('height'))
- formats.append(fmt)
- self._sort_formats(formats)
-
- categories = self._html_search_meta(
- 'keywords', webpage, 'categories', default='').split(',')
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'categories': categories,
- 'age_limit': 18,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/hotstar.py b/youtube_dl/extractor/hotstar.py
deleted file mode 100644
index f9f7c5a64..000000000
--- a/youtube_dl/extractor/hotstar.py
+++ /dev/null
@@ -1,205 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import hashlib
-import hmac
-import re
-import time
-import uuid
-
-from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_str,
-)
-from ..utils import (
- determine_ext,
- ExtractorError,
- int_or_none,
- str_or_none,
- try_get,
- url_or_none,
-)
-
-
-class HotStarBaseIE(InfoExtractor):
- _AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee'
-
- def _call_api_impl(self, path, video_id, query):
- st = int(time.time())
- exp = st + 6000
- auth = 'st=%d~exp=%d~acl=/*' % (st, exp)
- auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest()
- response = self._download_json(
- 'https://api.hotstar.com/' + path, video_id, headers={
- 'hotstarauth': auth,
- 'x-country-code': 'IN',
- 'x-platform-code': 'JIO',
- }, query=query)
- if response['statusCode'] != 'OK':
- raise ExtractorError(
- response['body']['message'], expected=True)
- return response['body']['results']
-
- def _call_api(self, path, video_id, query_name='contentId'):
- return self._call_api_impl(path, video_id, {
- query_name: video_id,
- 'tas': 10000,
- })
-
- def _call_api_v2(self, path, video_id):
- return self._call_api_impl(
- '%s/in/contents/%s' % (path, video_id), video_id, {
- 'desiredConfig': 'encryption:plain;ladder:phone,tv;package:hls,dash',
- 'client': 'mweb',
- 'clientVersion': '6.18.0',
- 'deviceId': compat_str(uuid.uuid4()),
- 'osName': 'Windows',
- 'osVersion': '10',
- })
-
-
-class HotStarIE(HotStarBaseIE):
- IE_NAME = 'hotstar'
- _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
- _TESTS = [{
- # contentData
- 'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273',
- 'info_dict': {
- 'id': '1000076273',
- 'ext': 'mp4',
- 'title': 'Can You Not Spread Rumours?',
- 'description': 'md5:c957d8868e9bc793ccb813691cc4c434',
- 'timestamp': 1447248600,
- 'upload_date': '20151111',
- 'duration': 381,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- }
- }, {
- # contentDetail
- 'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157',
- 'only_matching': True,
- }, {
- 'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583',
- 'only_matching': True,
- }, {
- 'url': 'http://www.hotstar.com/1000000515',
- 'only_matching': True,
- }, {
- # only available via api v2
- 'url': 'https://www.hotstar.com/tv/ek-bhram-sarvagun-sampanna/s-2116/janhvi-targets-suman/1000234847',
- 'only_matching': True,
- }]
- _GEO_BYPASS = False
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
- app_state = self._parse_json(self._search_regex(
- r'<script>window\.APP_STATE\s*=\s*({.+?})</script>',
- webpage, 'app state'), video_id)
- video_data = {}
- getters = list(
- lambda x, k=k: x['initialState']['content%s' % k]['content']
- for k in ('Data', 'Detail')
- )
- for v in app_state.values():
- content = try_get(v, getters, dict)
- if content and content.get('contentId') == video_id:
- video_data = content
- break
-
- title = video_data['title']
-
- if video_data.get('drmProtected'):
- raise ExtractorError('This video is DRM protected.', expected=True)
-
- formats = []
- geo_restricted = False
- playback_sets = self._call_api_v2('h/v2/play', video_id)['playBackSets']
- for playback_set in playback_sets:
- if not isinstance(playback_set, dict):
- continue
- format_url = url_or_none(playback_set.get('playbackUrl'))
- if not format_url:
- continue
- format_url = re.sub(
- r'(?<=//staragvod)(\d)', r'web\1', format_url)
- tags = str_or_none(playback_set.get('tagsCombination')) or ''
- if tags and 'encryption:plain' not in tags:
- continue
- ext = determine_ext(format_url)
- try:
- if 'package:hls' in tags or ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- format_url, video_id, 'mp4',
- entry_protocol='m3u8_native', m3u8_id='hls'))
- elif 'package:dash' in tags or ext == 'mpd':
- formats.extend(self._extract_mpd_formats(
- format_url, video_id, mpd_id='dash'))
- elif ext == 'f4m':
- # produce broken files
- pass
- else:
- formats.append({
- 'url': format_url,
- 'width': int_or_none(playback_set.get('width')),
- 'height': int_or_none(playback_set.get('height')),
- })
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
- geo_restricted = True
- continue
- if not formats and geo_restricted:
- self.raise_geo_restricted(countries=['IN'])
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': video_data.get('description'),
- 'duration': int_or_none(video_data.get('duration')),
- 'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')),
- 'formats': formats,
- 'channel': video_data.get('channelName'),
- 'channel_id': video_data.get('channelId'),
- 'series': video_data.get('showName'),
- 'season': video_data.get('seasonName'),
- 'season_number': int_or_none(video_data.get('seasonNo')),
- 'season_id': video_data.get('seasonId'),
- 'episode': title,
- 'episode_number': int_or_none(video_data.get('episodeNo')),
- }
-
-
-class HotStarPlaylistIE(HotStarBaseIE):
- IE_NAME = 'hotstar:playlist'
- _VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)'
- _TESTS = [{
- 'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26',
- 'info_dict': {
- 'id': '3_2_26',
- },
- 'playlist_mincount': 20,
- }, {
- 'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
-
- collection = self._call_api('o/v1/tray/find', playlist_id, 'uqId')
-
- entries = [
- self.url_result(
- 'https://www.hotstar.com/%s' % video['contentId'],
- ie=HotStarIE.ie_key(), video_id=video['contentId'])
- for video in collection['assets']['items']
- if video.get('contentId')]
-
- return self.playlist_result(entries, playlist_id)
diff --git a/youtube_dl/extractor/iconosquare.py b/youtube_dl/extractor/iconosquare.py
deleted file mode 100644
index a39f422e9..000000000
--- a/youtube_dl/extractor/iconosquare.py
+++ /dev/null
@@ -1,85 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- get_element_by_id,
- remove_end,
-)
-
-
-class IconosquareIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?(?:iconosquare\.com|statigr\.am)/p/(?P<id>[^/]+)'
- _TEST = {
- 'url': 'http://statigr.am/p/522207370455279102_24101272',
- 'md5': '6eb93b882a3ded7c378ee1d6884b1814',
- 'info_dict': {
- 'id': '522207370455279102_24101272',
- 'ext': 'mp4',
- 'title': 'Instagram photo by @aguynamedpatrick (Patrick Janelle)',
- 'description': 'md5:644406a9ec27457ed7aa7a9ebcd4ce3d',
- 'timestamp': 1376471991,
- 'upload_date': '20130814',
- 'uploader': 'aguynamedpatrick',
- 'uploader_id': '24101272',
- 'comment_count': int,
- 'like_count': int,
- },
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- media = self._parse_json(
- get_element_by_id('mediaJson', webpage),
- video_id)
-
- formats = [{
- 'url': f['url'],
- 'format_id': format_id,
- 'width': int_or_none(f.get('width')),
- 'height': int_or_none(f.get('height'))
- } for format_id, f in media['videos'].items()]
- self._sort_formats(formats)
-
- title = remove_end(self._og_search_title(webpage), ' - via Iconosquare')
-
- timestamp = int_or_none(media.get('created_time') or media.get('caption', {}).get('created_time'))
- description = media.get('caption', {}).get('text')
-
- uploader = media.get('user', {}).get('username')
- uploader_id = media.get('user', {}).get('id')
-
- comment_count = int_or_none(media.get('comments', {}).get('count'))
- like_count = int_or_none(media.get('likes', {}).get('count'))
-
- thumbnails = [{
- 'url': t['url'],
- 'id': thumbnail_id,
- 'width': int_or_none(t.get('width')),
- 'height': int_or_none(t.get('height'))
- } for thumbnail_id, t in media.get('images', {}).items()]
-
- comments = [{
- 'id': comment.get('id'),
- 'text': comment['text'],
- 'timestamp': int_or_none(comment.get('created_time')),
- 'author': comment.get('from', {}).get('full_name'),
- 'author_id': comment.get('from', {}).get('username'),
- } for comment in media.get('comments', {}).get('data', []) if 'text' in comment]
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnails': thumbnails,
- 'timestamp': timestamp,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'comment_count': comment_count,
- 'like_count': like_count,
- 'formats': formats,
- 'comments': comments,
- }
diff --git a/youtube_dl/extractor/imdb.py b/youtube_dl/extractor/imdb.py
deleted file mode 100644
index 436759da5..000000000
--- a/youtube_dl/extractor/imdb.py
+++ /dev/null
@@ -1,121 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- determine_ext,
- mimetype2ext,
- parse_duration,
- qualities,
- url_or_none,
-)
-
-
-class ImdbIE(InfoExtractor):
- IE_NAME = 'imdb'
- IE_DESC = 'Internet Movie Database trailers'
- _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video|title|list).+?[/-]vi(?P<id>\d+)'
-
- _TESTS = [{
- 'url': 'http://www.imdb.com/video/imdb/vi2524815897',
- 'info_dict': {
- 'id': '2524815897',
- 'ext': 'mp4',
- 'title': 'No. 2 from Ice Age: Continental Drift (2012)',
- 'description': 'md5:87bd0bdc61e351f21f20d2d7441cb4e7',
- }
- }, {
- 'url': 'http://www.imdb.com/video/_/vi2524815897',
- 'only_matching': True,
- }, {
- 'url': 'http://www.imdb.com/title/tt1667889/?ref_=ext_shr_eml_vi#lb-vi2524815897',
- 'only_matching': True,
- }, {
- 'url': 'http://www.imdb.com/title/tt1667889/#lb-vi2524815897',
- 'only_matching': True,
- }, {
- 'url': 'http://www.imdb.com/videoplayer/vi1562949145',
- 'only_matching': True,
- }, {
- 'url': 'http://www.imdb.com/title/tt4218696/videoplayer/vi2608641561',
- 'only_matching': True,
- }, {
- 'url': 'https://www.imdb.com/list/ls009921623/videoplayer/vi260482329',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(
- 'https://www.imdb.com/videoplayer/vi' + video_id, video_id)
- video_metadata = self._parse_json(self._search_regex(
- r'window\.IMDbReactInitialState\.push\(({.+?})\);', webpage,
- 'video metadata'), video_id)['videos']['videoMetadata']['vi' + video_id]
- title = self._html_search_meta(
- ['og:title', 'twitter:title'], webpage) or self._html_search_regex(
- r'<title>(.+?)</title>', webpage, 'title', fatal=False) or video_metadata['title']
-
- quality = qualities(('SD', '480p', '720p', '1080p'))
- formats = []
- for encoding in video_metadata.get('encodings', []):
- if not encoding or not isinstance(encoding, dict):
- continue
- video_url = url_or_none(encoding.get('videoUrl'))
- if not video_url:
- continue
- ext = mimetype2ext(encoding.get(
- 'mimeType')) or determine_ext(video_url)
- if ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- video_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
- continue
- format_id = encoding.get('definition')
- formats.append({
- 'format_id': format_id,
- 'url': video_url,
- 'ext': ext,
- 'quality': quality(format_id),
- })
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- 'description': video_metadata.get('description'),
- 'thumbnail': video_metadata.get('slate', {}).get('url'),
- 'duration': parse_duration(video_metadata.get('duration')),
- }
-
-
-class ImdbListIE(InfoExtractor):
- IE_NAME = 'imdb:list'
- IE_DESC = 'Internet Movie Database lists'
- _VALID_URL = r'https?://(?:www\.)?imdb\.com/list/ls(?P<id>\d{9})(?!/videoplayer/vi\d+)'
- _TEST = {
- 'url': 'https://www.imdb.com/list/ls009921623/',
- 'info_dict': {
- 'id': '009921623',
- 'title': 'The Bourne Legacy',
- 'description': 'A list of trailers, clips, and more from The Bourne Legacy, starring Jeremy Renner and Rachel Weisz.',
- },
- 'playlist_count': 8,
- }
-
- def _real_extract(self, url):
- list_id = self._match_id(url)
- webpage = self._download_webpage(url, list_id)
- entries = [
- self.url_result('http://www.imdb.com' + m, 'Imdb')
- for m in re.findall(r'href="(/list/ls%s/videoplayer/vi[^"]+)"' % list_id, webpage)]
-
- list_title = self._html_search_regex(
- r'<h1[^>]+class="[^"]*header[^"]*"[^>]*>(.*?)</h1>',
- webpage, 'list title')
- list_description = self._html_search_regex(
- r'<div[^>]+class="[^"]*list-description[^"]*"[^>]*><p>(.*?)</p>',
- webpage, 'list description')
-
- return self.playlist_result(entries, list_id, list_title, list_description)
diff --git a/youtube_dl/extractor/imgur.py b/youtube_dl/extractor/imgur.py
deleted file mode 100644
index a5ba03efa..000000000
--- a/youtube_dl/extractor/imgur.py
+++ /dev/null
@@ -1,154 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- js_to_json,
- mimetype2ext,
- ExtractorError,
-)
-
-
-class ImgurIE(InfoExtractor):
- _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|(?:t(?:opic)?|r)/[^/]+)/)(?P<id>[a-zA-Z0-9]+)'
-
- _TESTS = [{
- 'url': 'https://i.imgur.com/A61SaA1.gifv',
- 'info_dict': {
- 'id': 'A61SaA1',
- 'ext': 'mp4',
- 'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
- },
- }, {
- 'url': 'https://imgur.com/A61SaA1',
- 'only_matching': True,
- }, {
- 'url': 'https://i.imgur.com/crGpqCV.mp4',
- 'only_matching': True,
- }, {
- # no title
- 'url': 'https://i.imgur.com/jxBXAMC.gifv',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(
- 'https://i.imgur.com/{id}.gifv'.format(id=video_id), video_id)
-
- width = int_or_none(self._og_search_property(
- 'video:width', webpage, default=None))
- height = int_or_none(self._og_search_property(
- 'video:height', webpage, default=None))
-
- video_elements = self._search_regex(
- r'(?s)<div class="video-elements">(.*?)</div>',
- webpage, 'video elements', default=None)
- if not video_elements:
- raise ExtractorError(
- 'No sources found for video %s. Maybe an image?' % video_id,
- expected=True)
-
- formats = []
- for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements):
- formats.append({
- 'format_id': m.group('type').partition('/')[2],
- 'url': self._proto_relative_url(m.group('src')),
- 'ext': mimetype2ext(m.group('type')),
- 'width': width,
- 'height': height,
- 'http_headers': {
- 'User-Agent': 'youtube-dl (like wget)',
- },
- })
-
- gif_json = self._search_regex(
- r'(?s)var\s+videoItem\s*=\s*(\{.*?\})',
- webpage, 'GIF code', fatal=False)
- if gif_json:
- gifd = self._parse_json(
- gif_json, video_id, transform_source=js_to_json)
- formats.append({
- 'format_id': 'gif',
- 'preference': -10,
- 'width': width,
- 'height': height,
- 'ext': 'gif',
- 'acodec': 'none',
- 'vcodec': 'gif',
- 'container': 'gif',
- 'url': self._proto_relative_url(gifd['gifUrl']),
- 'filesize': gifd.get('size'),
- 'http_headers': {
- 'User-Agent': 'youtube-dl (like wget)',
- },
- })
-
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'formats': formats,
- 'title': self._og_search_title(webpage, default=video_id),
- }
-
-
-class ImgurGalleryIE(InfoExtractor):
- IE_NAME = 'imgur:gallery'
- _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/]+)/(?P<id>[a-zA-Z0-9]+)'
-
- _TESTS = [{
- 'url': 'http://imgur.com/gallery/Q95ko',
- 'info_dict': {
- 'id': 'Q95ko',
- 'title': 'Adding faces make every GIF better',
- },
- 'playlist_count': 25,
- }, {
- 'url': 'http://imgur.com/topic/Aww/ll5Vk',
- 'only_matching': True,
- }, {
- 'url': 'https://imgur.com/gallery/YcAQlkx',
- 'info_dict': {
- 'id': 'YcAQlkx',
- 'ext': 'mp4',
- 'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
- }
- }, {
- 'url': 'http://imgur.com/topic/Funny/N8rOudd',
- 'only_matching': True,
- }, {
- 'url': 'http://imgur.com/r/aww/VQcQPhM',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- gallery_id = self._match_id(url)
-
- data = self._download_json(
- 'https://imgur.com/gallery/%s.json' % gallery_id,
- gallery_id)['data']['image']
-
- if data.get('is_album'):
- entries = [
- self.url_result('http://imgur.com/%s' % image['hash'], ImgurIE.ie_key(), image['hash'])
- for image in data['album_images']['images'] if image.get('hash')]
- return self.playlist_result(entries, gallery_id, data.get('title'), data.get('description'))
-
- return self.url_result('http://imgur.com/%s' % gallery_id, ImgurIE.ie_key(), gallery_id)
-
-
-class ImgurAlbumIE(ImgurGalleryIE):
- IE_NAME = 'imgur:album'
- _VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)'
-
- _TESTS = [{
- 'url': 'http://imgur.com/a/j6Orj',
- 'info_dict': {
- 'id': 'j6Orj',
- 'title': 'A Literary Analysis of "Star Wars: The Force Awakens"',
- },
- 'playlist_count': 12,
- }]
diff --git a/youtube_dl/extractor/indavideo.py b/youtube_dl/extractor/indavideo.py
deleted file mode 100644
index 2b5b2b5b0..000000000
--- a/youtube_dl/extractor/indavideo.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- int_or_none,
- parse_age_limit,
- parse_iso8601,
- update_url_query,
-)
-
-
-class IndavideoEmbedIE(InfoExtractor):
- _VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)'
- _TESTS = [{
- 'url': 'http://indavideo.hu/player/video/1bdc3c6d80/',
- 'md5': 'c8a507a1c7410685f83a06eaeeaafeab',
- 'info_dict': {
- 'id': '1837039',
- 'ext': 'mp4',
- 'title': 'Cicatánc',
- 'description': '',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'cukiajanlo',
- 'uploader_id': '83729',
- 'timestamp': 1439193826,
- 'upload_date': '20150810',
- 'duration': 72,
- 'age_limit': 0,
- 'tags': ['tánc', 'cica', 'cuki', 'cukiajanlo', 'newsroom'],
- },
- }, {
- 'url': 'http://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1',
- 'only_matching': True,
- }, {
- 'url': 'http://assets.indavideo.hu/swf/player.swf?v=fe25e500&vID=1bdc3c6d80&autostart=1&hide=1&i=1',
- 'only_matching': True,
- }]
-
- # Some example URLs covered by generic extractor:
- # http://indavideo.hu/video/Vicces_cica_1
- # http://index.indavideo.hu/video/2015_0728_beregszasz
- # http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
- # http://erotika.indavideo.hu/video/Amator_tini_punci
- # http://film.indavideo.hu/video/f_hrom_nagymamm_volt
- # http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
-
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//embed\.indavideo\.hu/player/video/[\da-f]+)',
- webpage)
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- video = self._download_json(
- 'http://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id,
- video_id)['data']
-
- title = video['title']
-
- video_urls = []
-
- video_files = video.get('video_files')
- if isinstance(video_files, list):
- video_urls.extend(video_files)
- elif isinstance(video_files, dict):
- video_urls.extend(video_files.values())
-
- video_file = video.get('video_file')
- if video:
- video_urls.append(video_file)
- video_urls = list(set(video_urls))
-
- video_prefix = video_urls[0].rsplit('/', 1)[0]
-
- for flv_file in video.get('flv_files', []):
- flv_url = '%s/%s' % (video_prefix, flv_file)
- if flv_url not in video_urls:
- video_urls.append(flv_url)
-
- filesh = video.get('filesh')
-
- formats = []
- for video_url in video_urls:
- height = int_or_none(self._search_regex(
- r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None))
- if filesh:
- if not height:
- continue
- token = filesh.get(compat_str(height))
- if token is None:
- continue
- video_url = update_url_query(video_url, {'token': token})
- formats.append({
- 'url': video_url,
- 'height': height,
- })
- self._sort_formats(formats)
-
- timestamp = video.get('date')
- if timestamp:
- # upload date is in CEST
- timestamp = parse_iso8601(timestamp + ' +0200', ' ')
-
- thumbnails = [{
- 'url': self._proto_relative_url(thumbnail)
- } for thumbnail in video.get('thumbnails', [])]
-
- tags = [tag['title'] for tag in video.get('tags') or []]
-
- return {
- 'id': video.get('id') or video_id,
- 'title': title,
- 'description': video.get('description'),
- 'thumbnails': thumbnails,
- 'uploader': video.get('user_name'),
- 'uploader_id': video.get('user_id'),
- 'timestamp': timestamp,
- 'duration': int_or_none(video.get('length')),
- 'age_limit': parse_age_limit(video.get('age_limit')),
- 'tags': tags,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/internetvideoarchive.py b/youtube_dl/extractor/internetvideoarchive.py
deleted file mode 100644
index 76cc5ec3e..000000000
--- a/youtube_dl/extractor/internetvideoarchive.py
+++ /dev/null
@@ -1,100 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..compat import (
- compat_parse_qs,
- compat_urlparse,
-)
-from ..utils import (
- determine_ext,
- int_or_none,
- xpath_text,
-)
-
-
-class InternetVideoArchiveIE(InfoExtractor):
- _VALID_URL = r'https?://video\.internetvideoarchive\.net/(?:player|flash/players)/.*?\?.*?publishedid.*?'
-
- _TEST = {
- 'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?customerid=69249&publishedid=194487&reporttag=vdbetatitle&playerid=641&autolist=0&domain=www.videodetective.com&maxrate=high&minrate=low&socialplayer=false',
- 'info_dict': {
- 'id': '194487',
- 'ext': 'mp4',
- 'title': 'KICK-ASS 2',
- 'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }
-
- @staticmethod
- def _build_json_url(query):
- return 'http://video.internetvideoarchive.net/player/6/configuration.ashx?' + query
-
- @staticmethod
- def _build_xml_url(query):
- return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query
-
- def _real_extract(self, url):
- query = compat_urlparse.urlparse(url).query
- query_dic = compat_parse_qs(query)
- video_id = query_dic['publishedid'][0]
-
- if '/player/' in url:
- configuration = self._download_json(url, video_id)
-
- # There are multiple videos in the playlist whlie only the first one
- # matches the video played in browsers
- video_info = configuration['playlist'][0]
- title = video_info['title']
-
- formats = []
- for source in video_info['sources']:
- file_url = source['file']
- if determine_ext(file_url) == 'm3u8':
- m3u8_formats = self._extract_m3u8_formats(
- file_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)
- if m3u8_formats:
- formats.extend(m3u8_formats)
- file_url = m3u8_formats[0]['url']
- formats.extend(self._extract_f4m_formats(
- file_url.replace('.m3u8', '.f4m'),
- video_id, f4m_id='hds', fatal=False))
- formats.extend(self._extract_mpd_formats(
- file_url.replace('.m3u8', '.mpd'),
- video_id, mpd_id='dash', fatal=False))
- else:
- a_format = {
- 'url': file_url,
- }
-
- if source.get('label') and source['label'][-4:] == ' kbs':
- tbr = int_or_none(source['label'][:-4])
- a_format.update({
- 'tbr': tbr,
- 'format_id': 'http-%d' % tbr,
- })
- formats.append(a_format)
-
- self._sort_formats(formats)
-
- description = video_info.get('description')
- thumbnail = video_info.get('image')
- else:
- configuration = self._download_xml(url, video_id)
- formats = [{
- 'url': xpath_text(configuration, './file', 'file URL', fatal=True),
- }]
- thumbnail = xpath_text(configuration, './image', 'thumbnail')
- title = 'InternetVideoArchive video %s' % video_id
- description = None
-
- return {
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- 'thumbnail': thumbnail,
- 'description': description,
- }
diff --git a/youtube_dl/extractor/iprima.py b/youtube_dl/extractor/iprima.py
deleted file mode 100644
index 11bbeb592..000000000
--- a/youtube_dl/extractor/iprima.py
+++ /dev/null
@@ -1,132 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-import time
-
-from .common import InfoExtractor
-from ..utils import (
- determine_ext,
- js_to_json,
-)
-
-
-class IPrimaIE(InfoExtractor):
- _VALID_URL = r'https?://(?:[^/]+)\.iprima\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)'
- _GEO_BYPASS = False
-
- _TESTS = [{
- 'url': 'http://play.iprima.cz/gondici-s-r-o-33',
- 'info_dict': {
- 'id': 'p136534',
- 'ext': 'mp4',
- 'title': 'Gondíci s. r. o. (34)',
- 'description': 'md5:16577c629d006aa91f59ca8d8e7f99bd',
- },
- 'params': {
- 'skip_download': True, # m3u8 download
- },
- }, {
- 'url': 'http://play.iprima.cz/particka/particka-92',
- 'only_matching': True,
- }, {
- # geo restricted
- 'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1',
- 'only_matching': True,
- }, {
- # iframe api.play-backend.iprima.cz
- 'url': 'https://prima.iprima.cz/my-little-pony/mapa-znameni-2-2',
- 'only_matching': True,
- }, {
- # iframe prima.iprima.cz
- 'url': 'https://prima.iprima.cz/porady/jak-se-stavi-sen/rodina-rathousova-praha',
- 'only_matching': True,
- }, {
- 'url': 'http://www.iprima.cz/filmy/desne-rande',
- 'only_matching': True,
- }, {
- 'url': 'https://zoom.iprima.cz/10-nejvetsich-tajemstvi-zahad/posvatna-mista-a-stavby',
- 'only_matching': True,
- }, {
- 'url': 'https://krimi.iprima.cz/mraz-0/sebevrazdy',
- 'only_matching': True,
- }, {
- 'url': 'https://cool.iprima.cz/derava-silnice-nevadi',
- 'only_matching': True,
- }, {
- 'url': 'https://love.iprima.cz/laska-az-za-hrob/slib-dany-bratrovi',
- 'only_matching': True,
- }, {
- 'url': 'https://autosalon.iprima.cz/motorsport/7-epizoda-1',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- self._set_cookie('play.iprima.cz', 'ott_adult_confirmed', '1')
-
- webpage = self._download_webpage(url, video_id)
-
- video_id = self._search_regex(
- (r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)',
- r'data-product="([^"]+)">'),
- webpage, 'real id')
-
- playerpage = self._download_webpage(
- 'http://play.iprima.cz/prehravac/init',
- video_id, note='Downloading player', query={
- '_infuse': 1,
- '_ts': round(time.time()),
- 'productId': video_id,
- }, headers={'Referer': url})
-
- formats = []
-
- def extract_formats(format_url, format_key=None, lang=None):
- ext = determine_ext(format_url)
- new_formats = []
- if format_key == 'hls' or ext == 'm3u8':
- new_formats = self._extract_m3u8_formats(
- format_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False)
- elif format_key == 'dash' or ext == 'mpd':
- return
- new_formats = self._extract_mpd_formats(
- format_url, video_id, mpd_id='dash', fatal=False)
- if lang:
- for f in new_formats:
- if not f.get('language'):
- f['language'] = lang
- formats.extend(new_formats)
-
- options = self._parse_json(
- self._search_regex(
- r'(?s)(?:TDIPlayerOptions|playerOptions)\s*=\s*({.+?});\s*\]\]',
- playerpage, 'player options', default='{}'),
- video_id, transform_source=js_to_json, fatal=False)
- if options:
- for key, tracks in options.get('tracks', {}).items():
- if not isinstance(tracks, list):
- continue
- for track in tracks:
- src = track.get('src')
- if src:
- extract_formats(src, key.lower(), track.get('lang'))
-
- if not formats:
- for _, src in re.findall(r'src["\']\s*:\s*(["\'])(.+?)\1', playerpage):
- extract_formats(src)
-
- if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage:
- self.raise_geo_restricted(countries=['CZ'])
-
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': self._og_search_title(webpage),
- 'thumbnail': self._og_search_thumbnail(webpage),
- 'formats': formats,
- 'description': self._og_search_description(webpage),
- }
diff --git a/youtube_dl/extractor/ivi.py b/youtube_dl/extractor/ivi.py
deleted file mode 100644
index 86c014b07..000000000
--- a/youtube_dl/extractor/ivi.py
+++ /dev/null
@@ -1,220 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-import json
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- int_or_none,
- qualities,
-)
-
-
-class IviIE(InfoExtractor):
- IE_DESC = 'ivi.ru'
- IE_NAME = 'ivi'
- _VALID_URL = r'https?://(?:www\.)?ivi\.(?:ru|tv)/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
- _GEO_BYPASS = False
- _GEO_COUNTRIES = ['RU']
-
- _TESTS = [
- # Single movie
- {
- 'url': 'http://www.ivi.ru/watch/53141',
- 'md5': '6ff5be2254e796ed346251d117196cf4',
- 'info_dict': {
- 'id': '53141',
- 'ext': 'mp4',
- 'title': 'Иван Васильевич меняет профессию',
- 'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
- 'duration': 5498,
- 'thumbnail': r're:^https?://.*\.jpg$',
- },
- 'skip': 'Only works from Russia',
- },
- # Serial's series
- {
- 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/9549',
- 'md5': '221f56b35e3ed815fde2df71032f4b3e',
- 'info_dict': {
- 'id': '9549',
- 'ext': 'mp4',
- 'title': 'Двое из ларца - Дело Гольдберга (1 часть)',
- 'series': 'Двое из ларца',
- 'season': 'Сезон 1',
- 'season_number': 1,
- 'episode': 'Дело Гольдберга (1 часть)',
- 'episode_number': 1,
- 'duration': 2655,
- 'thumbnail': r're:^https?://.*\.jpg$',
- },
- 'skip': 'Only works from Russia',
- },
- {
- # with MP4-HD720 format
- 'url': 'http://www.ivi.ru/watch/146500',
- 'md5': 'd63d35cdbfa1ea61a5eafec7cc523e1e',
- 'info_dict': {
- 'id': '146500',
- 'ext': 'mp4',
- 'title': 'Кукла',
- 'description': 'md5:ffca9372399976a2d260a407cc74cce6',
- 'duration': 5599,
- 'thumbnail': r're:^https?://.*\.jpg$',
- },
- 'skip': 'Only works from Russia',
- },
- {
- 'url': 'https://www.ivi.tv/watch/33560/',
- 'only_matching': True,
- },
- ]
-
- # Sorted by quality
- _KNOWN_FORMATS = (
- 'MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi',
- 'MP4-SHQ', 'MP4-HD720', 'MP4-HD1080')
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- data = {
- 'method': 'da.content.get',
- 'params': [
- video_id, {
- 'site': 's183',
- 'referrer': 'http://www.ivi.ru/watch/%s' % video_id,
- 'contentid': video_id
- }
- ]
- }
-
- video_json = self._download_json(
- 'http://api.digitalaccess.ru/api/json/', video_id,
- 'Downloading video JSON', data=json.dumps(data))
-
- if 'error' in video_json:
- error = video_json['error']
- origin = error['origin']
- if origin == 'NotAllowedForLocation':
- self.raise_geo_restricted(
- msg=error['message'], countries=self._GEO_COUNTRIES)
- elif origin == 'NoRedisValidData':
- raise ExtractorError('Video %s does not exist' % video_id, expected=True)
- raise ExtractorError(
- 'Unable to download video %s: %s' % (video_id, error['message']),
- expected=True)
-
- result = video_json['result']
-
- quality = qualities(self._KNOWN_FORMATS)
-
- formats = [{
- 'url': x['url'],
- 'format_id': x.get('content_format'),
- 'quality': quality(x.get('content_format')),
- } for x in result['files'] if x.get('url')]
-
- self._sort_formats(formats)
-
- title = result['title']
-
- duration = int_or_none(result.get('duration'))
- compilation = result.get('compilation')
- episode = title if compilation else None
-
- title = '%s - %s' % (compilation, title) if compilation is not None else title
-
- thumbnails = [{
- 'url': preview['url'],
- 'id': preview.get('content_format'),
- } for preview in result.get('preview', []) if preview.get('url')]
-
- webpage = self._download_webpage(url, video_id)
-
- season = self._search_regex(
- r'<li[^>]+class="season active"[^>]*><a[^>]+>([^<]+)',
- webpage, 'season', default=None)
- season_number = int_or_none(self._search_regex(
- r'<li[^>]+class="season active"[^>]*><a[^>]+data-season(?:-index)?="(\d+)"',
- webpage, 'season number', default=None))
-
- episode_number = int_or_none(self._search_regex(
- r'[^>]+itemprop="episode"[^>]*>\s*<meta[^>]+itemprop="episodeNumber"[^>]+content="(\d+)',
- webpage, 'episode number', default=None))
-
- description = self._og_search_description(webpage, default=None) or self._html_search_meta(
- 'description', webpage, 'description', default=None)
-
- return {
- 'id': video_id,
- 'title': title,
- 'series': compilation,
- 'season': season,
- 'season_number': season_number,
- 'episode': episode,
- 'episode_number': episode_number,
- 'thumbnails': thumbnails,
- 'description': description,
- 'duration': duration,
- 'formats': formats,
- }
-
-
-class IviCompilationIE(InfoExtractor):
- IE_DESC = 'ivi.ru compilations'
- IE_NAME = 'ivi:compilation'
- _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
- _TESTS = [{
- 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa',
- 'info_dict': {
- 'id': 'dvoe_iz_lartsa',
- 'title': 'Двое из ларца (2006 - 2008)',
- },
- 'playlist_mincount': 24,
- }, {
- 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/season1',
- 'info_dict': {
- 'id': 'dvoe_iz_lartsa/season1',
- 'title': 'Двое из ларца (2006 - 2008) 1 сезон',
- },
- 'playlist_mincount': 12,
- }]
-
- def _extract_entries(self, html, compilation_id):
- return [
- self.url_result(
- 'http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), IviIE.ie_key())
- for serie in re.findall(
- r'<a href="/watch/%s/(\d+)"[^>]+data-id="\1"' % compilation_id, html)]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- compilation_id = mobj.group('compilationid')
- season_id = mobj.group('seasonid')
-
- if season_id is not None: # Season link
- season_page = self._download_webpage(
- url, compilation_id, 'Downloading season %s web page' % season_id)
- playlist_id = '%s/season%s' % (compilation_id, season_id)
- playlist_title = self._html_search_meta('title', season_page, 'title')
- entries = self._extract_entries(season_page, compilation_id)
- else: # Compilation link
- compilation_page = self._download_webpage(url, compilation_id, 'Downloading compilation web page')
- playlist_id = compilation_id
- playlist_title = self._html_search_meta('title', compilation_page, 'title')
- seasons = re.findall(
- r'<a href="/watch/%s/season(\d+)' % compilation_id, compilation_page)
- if not seasons: # No seasons in this compilation
- entries = self._extract_entries(compilation_page, compilation_id)
- else:
- entries = []
- for season_id in seasons:
- season_page = self._download_webpage(
- 'http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id),
- compilation_id, 'Downloading season %s web page' % season_id)
- entries.extend(self._extract_entries(season_page, compilation_id))
-
- return self.playlist_result(entries, playlist_id, playlist_title)
diff --git a/youtube_dl/extractor/jamendo.py b/youtube_dl/extractor/jamendo.py
deleted file mode 100644
index c21827618..000000000
--- a/youtube_dl/extractor/jamendo.py
+++ /dev/null
@@ -1,150 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from ..compat import compat_urlparse
-from .common import InfoExtractor
-from ..utils import parse_duration
-
-
-class JamendoBaseIE(InfoExtractor):
- def _extract_meta(self, webpage, fatal=True):
- title = self._og_search_title(
- webpage, default=None) or self._search_regex(
- r'<title>([^<]+)', webpage,
- 'title', default=None)
- if title:
- title = self._search_regex(
- r'(.+?)\s*\|\s*Jamendo Music', title, 'title', default=None)
- if not title:
- title = self._html_search_meta(
- 'name', webpage, 'title', fatal=fatal)
- mobj = re.search(r'(.+) - (.+)', title or '')
- artist, second = mobj.groups() if mobj else [None] * 2
- return title, artist, second
-
-
-class JamendoIE(JamendoBaseIE):
- _VALID_URL = r'''(?x)
- https?://
- (?:
- licensing\.jamendo\.com/[^/]+|
- (?:www\.)?jamendo\.com
- )
- /track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+)
- '''
- _TESTS = [{
- 'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i',
- 'md5': '6e9e82ed6db98678f171c25a8ed09ffd',
- 'info_dict': {
- 'id': '196219',
- 'display_id': 'stories-from-emona-i',
- 'ext': 'flac',
- 'title': 'Maya Filipič - Stories from Emona I',
- 'artist': 'Maya Filipič',
- 'track': 'Stories from Emona I',
- 'duration': 210,
- 'thumbnail': r're:^https?://.*\.jpg'
- }
- }, {
- 'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = self._VALID_URL_RE.match(url)
- track_id = mobj.group('id')
- display_id = mobj.group('display_id')
-
- webpage = self._download_webpage(
- 'https://www.jamendo.com/track/%s/%s' % (track_id, display_id),
- display_id)
-
- title, artist, track = self._extract_meta(webpage)
-
- formats = [{
- 'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
- % (sub_domain, track_id, format_id),
- 'format_id': format_id,
- 'ext': ext,
- 'quality': quality,
- } for quality, (format_id, sub_domain, ext) in enumerate((
- ('mp31', 'mp3l', 'mp3'),
- ('mp32', 'mp3d', 'mp3'),
- ('ogg1', 'ogg', 'ogg'),
- ('flac', 'flac', 'flac'),
- ))]
- self._sort_formats(formats)
-
- thumbnail = self._html_search_meta(
- 'image', webpage, 'thumbnail', fatal=False)
- duration = parse_duration(self._search_regex(
- r'<span[^>]+itemprop=["\']duration["\'][^>]+content=["\'](.+?)["\']',
- webpage, 'duration', fatal=False))
-
- return {
- 'id': track_id,
- 'display_id': display_id,
- 'thumbnail': thumbnail,
- 'title': title,
- 'duration': duration,
- 'artist': artist,
- 'track': track,
- 'formats': formats
- }
-
-
-class JamendoAlbumIE(JamendoBaseIE):
- _VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)/(?P<display_id>[\w-]+)'
- _TEST = {
- 'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
- 'info_dict': {
- 'id': '121486',
- 'title': 'Shearer - Duck On Cover'
- },
- 'playlist': [{
- 'md5': 'e1a2fcb42bda30dfac990212924149a8',
- 'info_dict': {
- 'id': '1032333',
- 'ext': 'flac',
- 'title': 'Shearer - Warmachine',
- 'artist': 'Shearer',
- 'track': 'Warmachine',
- }
- }, {
- 'md5': '1f358d7b2f98edfe90fd55dac0799d50',
- 'info_dict': {
- 'id': '1032330',
- 'ext': 'flac',
- 'title': 'Shearer - Without Your Ghost',
- 'artist': 'Shearer',
- 'track': 'Without Your Ghost',
- }
- }],
- 'params': {
- 'playlistend': 2
- }
- }
-
- def _real_extract(self, url):
- mobj = self._VALID_URL_RE.match(url)
- album_id = mobj.group('id')
-
- webpage = self._download_webpage(url, mobj.group('display_id'))
-
- title, artist, album = self._extract_meta(webpage, fatal=False)
-
- entries = [{
- '_type': 'url_transparent',
- 'url': compat_urlparse.urljoin(url, m.group('path')),
- 'ie_key': JamendoIE.ie_key(),
- 'id': self._search_regex(
- r'/track/(\d+)', m.group('path'), 'track id', default=None),
- 'artist': artist,
- 'album': album,
- } for m in re.finditer(
- r'<a[^>]+href=(["\'])(?P<path>(?:(?!\1).)+)\1[^>]+class=["\'][^>]*js-trackrow-albumpage-link',
- webpage)]
-
- return self.playlist_result(entries, album_id, title)
diff --git a/youtube_dl/extractor/joj.py b/youtube_dl/extractor/joj.py
deleted file mode 100644
index 62b28e980..000000000
--- a/youtube_dl/extractor/joj.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- int_or_none,
- js_to_json,
- try_get,
-)
-
-
-class JojIE(InfoExtractor):
- _VALID_URL = r'''(?x)
- (?:
- joj:|
- https?://media\.joj\.sk/embed/
- )
- (?P<id>[^/?#^]+)
- '''
- _TESTS = [{
- 'url': 'https://media.joj.sk/embed/a388ec4c-6019-4a4a-9312-b1bee194e932',
- 'info_dict': {
- 'id': 'a388ec4c-6019-4a4a-9312-b1bee194e932',
- 'ext': 'mp4',
- 'title': 'NOVÉ BÝVANIE',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 3118,
- }
- }, {
- 'url': 'https://media.joj.sk/embed/9i1cxv',
- 'only_matching': True,
- }, {
- 'url': 'joj:a388ec4c-6019-4a4a-9312-b1bee194e932',
- 'only_matching': True,
- }, {
- 'url': 'joj:9i1cxv',
- 'only_matching': True,
- }]
-
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//media\.joj\.sk/embed/(?:(?!\1).)+)\1',
- webpage)]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(
- 'https://media.joj.sk/embed/%s' % video_id, video_id)
-
- title = self._search_regex(
- (r'videoTitle\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
- r'<title>(?P<title>[^<]+)'), webpage, 'title',
- default=None, group='title') or self._og_search_title(webpage)
-
- bitrates = self._parse_json(
- self._search_regex(
- r'(?s)(?:src|bitrates)\s*=\s*({.+?});', webpage, 'bitrates',
- default='{}'),
- video_id, transform_source=js_to_json, fatal=False)
-
- formats = []
- for format_url in try_get(bitrates, lambda x: x['mp4'], list) or []:
- if isinstance(format_url, compat_str):
- height = self._search_regex(
- r'(\d+)[pP]\.', format_url, 'height', default=None)
- formats.append({
- 'url': format_url,
- 'format_id': '%sp' % height if height else None,
- 'height': int(height),
- })
- if not formats:
- playlist = self._download_xml(
- 'https://media.joj.sk/services/Video.php?clip=%s' % video_id,
- video_id)
- for file_el in playlist.findall('./files/file'):
- path = file_el.get('path')
- if not path:
- continue
- format_id = file_el.get('id') or file_el.get('label')
- formats.append({
- 'url': 'http://n16.joj.sk/storage/%s' % path.replace(
- 'dat/', '', 1),
- 'format_id': format_id,
- 'height': int_or_none(self._search_regex(
- r'(\d+)[pP]', format_id or path, 'height',
- default=None)),
- })
- self._sort_formats(formats)
-
- thumbnail = self._og_search_thumbnail(webpage)
-
- duration = int_or_none(self._search_regex(
- r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
-
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/jpopsukitv.py b/youtube_dl/extractor/jpopsukitv.py
deleted file mode 100644
index 4b5f346d1..000000000
--- a/youtube_dl/extractor/jpopsukitv.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- unified_strdate,
-)
-
-
-class JpopsukiIE(InfoExtractor):
- IE_NAME = 'jpopsuki.tv'
- _VALID_URL = r'https?://(?:www\.)?jpopsuki\.tv/(?:category/)?video/[^/]+/(?P<id>\S+)'
-
- _TEST = {
- 'url': 'http://www.jpopsuki.tv/video/ayumi-hamasaki---evolution/00be659d23b0b40508169cdee4545771',
- 'md5': '88018c0c1a9b1387940e90ec9e7e198e',
- 'info_dict': {
- 'id': '00be659d23b0b40508169cdee4545771',
- 'ext': 'mp4',
- 'title': 'ayumi hamasaki - evolution',
- 'description': 'Release date: 2001.01.31\r\n浜崎あゆみ - evolution',
- 'thumbnail': 'http://www.jpopsuki.tv/cache/89722c74d2a2ebe58bcac65321c115b2.jpg',
- 'uploader': 'plama_chan',
- 'uploader_id': '404',
- 'upload_date': '20121101'
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- video_url = 'http://www.jpopsuki.tv' + self._html_search_regex(
- r'<source src="(.*?)" type', webpage, 'video url')
-
- video_title = self._og_search_title(webpage)
- description = self._og_search_description(webpage)
- thumbnail = self._og_search_thumbnail(webpage)
- uploader = self._html_search_regex(
- r'<li>from: <a href="/user/view/user/(.*?)/uid/',
- webpage, 'video uploader', fatal=False)
- uploader_id = self._html_search_regex(
- r'<li>from: <a href="/user/view/user/\S*?/uid/(\d*)',
- webpage, 'video uploader_id', fatal=False)
- upload_date = unified_strdate(self._html_search_regex(
- r'<li>uploaded: (.*?)</li>', webpage, 'video upload_date',
- fatal=False))
- view_count_str = self._html_search_regex(
- r'<li>Hits: ([0-9]+?)</li>', webpage, 'video view_count',
- fatal=False)
- comment_count_str = self._html_search_regex(
- r'<h2>([0-9]+?) comments</h2>', webpage, 'video comment_count',
- fatal=False)
-
- return {
- 'id': video_id,
- 'url': video_url,
- 'title': video_title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'upload_date': upload_date,
- 'view_count': int_or_none(view_count_str),
- 'comment_count': int_or_none(comment_count_str),
- }
diff --git a/youtube_dl/extractor/jwplatform.py b/youtube_dl/extractor/jwplatform.py
deleted file mode 100644
index 2aabd98b5..000000000
--- a/youtube_dl/extractor/jwplatform.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-
-
-class JWPlatformIE(InfoExtractor):
- _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
- _TESTS = [{
- 'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
- 'md5': 'fa8899fa601eb7c83a64e9d568bdf325',
- 'info_dict': {
- 'id': 'nPripu9l',
- 'ext': 'mov',
- 'title': 'Big Buck Bunny Trailer',
- 'description': 'Big Buck Bunny is a short animated film by the Blender Institute. It is made using free and open source software.',
- 'upload_date': '20081127',
- 'timestamp': 1227796140,
- }
- }, {
- 'url': 'https://cdn.jwplayer.com/players/nPripu9l-ALJ3XQCI.js',
- 'only_matching': True,
- }]
-
- @staticmethod
- def _extract_url(webpage):
- urls = JWPlatformIE._extract_urls(webpage)
- return urls[0] if urls else None
-
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<(?:script|iframe)[^>]+?src=["\']((?:https?:)?//content\.jwplatform\.com/players/[a-zA-Z0-9]{8})',
- webpage)
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- json_data = self._download_json('https://cdn.jwplayer.com/v2/media/' + video_id, video_id)
- return self._parse_jwplayer_data(json_data, video_id)
diff --git a/youtube_dl/extractor/kakao.py b/youtube_dl/extractor/kakao.py
deleted file mode 100644
index 7fa140b0c..000000000
--- a/youtube_dl/extractor/kakao.py
+++ /dev/null
@@ -1,149 +0,0 @@
-# coding: utf-8
-
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- int_or_none,
- unified_timestamp,
- update_url_query,
-)
-
-
-class KakaoIE(InfoExtractor):
- _VALID_URL = r'https?://tv\.kakao\.com/channel/(?P<channel>\d+)/cliplink/(?P<id>\d+)'
- _API_BASE = 'http://tv.kakao.com/api/v1/ft/cliplinks'
-
- _TESTS = [{
- 'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083',
- 'md5': '702b2fbdeb51ad82f5c904e8c0766340',
- 'info_dict': {
- 'id': '301965083',
- 'ext': 'mp4',
- 'title': '乃木坂46 バナナマン 「3期生紹介コーナーが始動!顔高低差GPも!」 『乃木坂工事中』',
- 'uploader_id': 2671005,
- 'uploader': '그랑그랑이',
- 'timestamp': 1488160199,
- 'upload_date': '20170227',
- }
- }, {
- 'url': 'http://tv.kakao.com/channel/2653210/cliplink/300103180',
- 'md5': 'a8917742069a4dd442516b86e7d66529',
- 'info_dict': {
- 'id': '300103180',
- 'ext': 'mp4',
- 'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
- 'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
- 'uploader_id': 2653210,
- 'uploader': '쇼 음악중심',
- 'timestamp': 1485684628,
- 'upload_date': '20170129',
- }
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- player_header = {
- 'Referer': update_url_query(
- 'http://tv.kakao.com/embed/player/cliplink/%s' % video_id, {
- 'service': 'kakao_tv',
- 'autoplay': '1',
- 'profile': 'HIGH',
- 'wmode': 'transparent',
- })
- }
-
- QUERY_COMMON = {
- 'player': 'monet_html5',
- 'referer': url,
- 'uuid': '',
- 'service': 'kakao_tv',
- 'section': '',
- 'dteType': 'PC',
- }
-
- query = QUERY_COMMON.copy()
- query['fields'] = 'clipLink,clip,channel,hasPlusFriend,-service,-tagList'
- impress = self._download_json(
- '%s/%s/impress' % (self._API_BASE, video_id),
- video_id, 'Downloading video info',
- query=query, headers=player_header)
-
- clip_link = impress['clipLink']
- clip = clip_link['clip']
-
- title = clip.get('title') or clip_link.get('displayTitle')
-
- tid = impress.get('tid', '')
-
- query = QUERY_COMMON.copy()
- query.update({
- 'tid': tid,
- 'profile': 'HIGH',
- })
- raw = self._download_json(
- '%s/%s/raw' % (self._API_BASE, video_id),
- video_id, 'Downloading video formats info',
- query=query, headers=player_header)
-
- formats = []
- for fmt in raw.get('outputList', []):
- try:
- profile_name = fmt['profile']
- fmt_url_json = self._download_json(
- '%s/%s/raw/videolocation' % (self._API_BASE, video_id),
- video_id,
- 'Downloading video URL for profile %s' % profile_name,
- query={
- 'service': 'kakao_tv',
- 'section': '',
- 'tid': tid,
- 'profile': profile_name
- }, headers=player_header, fatal=False)
-
- if fmt_url_json is None:
- continue
-
- fmt_url = fmt_url_json['url']
- formats.append({
- 'url': fmt_url,
- 'format_id': profile_name,
- 'width': int_or_none(fmt.get('width')),
- 'height': int_or_none(fmt.get('height')),
- 'format_note': fmt.get('label'),
- 'filesize': int_or_none(fmt.get('filesize'))
- })
- except KeyError:
- pass
- self._sort_formats(formats)
-
- thumbs = []
- for thumb in clip.get('clipChapterThumbnailList', []):
- thumbs.append({
- 'url': thumb.get('thumbnailUrl'),
- 'id': compat_str(thumb.get('timeInSec')),
- 'preference': -1 if thumb.get('isDefault') else 0
- })
- top_thumbnail = clip.get('thumbnailUrl')
- if top_thumbnail:
- thumbs.append({
- 'url': top_thumbnail,
- 'preference': 10,
- })
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': clip.get('description'),
- 'uploader': clip_link.get('channel', {}).get('name'),
- 'uploader_id': clip_link.get('channelId'),
- 'thumbnails': thumbs,
- 'timestamp': unified_timestamp(clip_link.get('createTime')),
- 'duration': int_or_none(clip.get('duration')),
- 'view_count': int_or_none(clip.get('playCount')),
- 'like_count': int_or_none(clip.get('likeCount')),
- 'comment_count': int_or_none(clip.get('commentCount')),
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/kaltura.py b/youtube_dl/extractor/kaltura.py
deleted file mode 100644
index 2d38b758b..000000000
--- a/youtube_dl/extractor/kaltura.py
+++ /dev/null
@@ -1,370 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-import base64
-
-from .common import InfoExtractor
-from ..compat import (
- compat_urlparse,
- compat_parse_qs,
-)
-from ..utils import (
- clean_html,
- ExtractorError,
- int_or_none,
- unsmuggle_url,
- smuggle_url,
-)
-
-
-class KalturaIE(InfoExtractor):
- _VALID_URL = r'''(?x)
- (?:
- kaltura:(?P<partner_id>\d+):(?P<id>[0-9a-z_]+)|
- https?://
- (:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
- (?:
- (?:
- # flash player
- index\.php/(?:kwidget|extwidget/preview)|
- # html5 player
- html5/html5lib/[^/]+/mwEmbedFrame\.php
- )
- )(?:/(?P<path>[^?]+))?(?:\?(?P<query>.*))?
- )
- '''
- _SERVICE_URL = 'http://cdnapi.kaltura.com'
- _SERVICE_BASE = '/api_v3/index.php'
- # See https://github.com/kaltura/server/blob/master/plugins/content/caption/base/lib/model/enums/CaptionType.php
- _CAPTION_TYPES = {
- 1: 'srt',
- 2: 'ttml',
- 3: 'vtt',
- }
- _TESTS = [
- {
- 'url': 'kaltura:269692:1_1jc2y3e4',
- 'md5': '3adcbdb3dcc02d647539e53f284ba171',
- 'info_dict': {
- 'id': '1_1jc2y3e4',
- 'ext': 'mp4',
- 'title': 'Straight from the Heart',
- 'upload_date': '20131219',
- 'uploader_id': 'mlundberg@wolfgangsvault.com',
- 'description': 'The Allman Brothers Band, 12/16/1981',
- 'thumbnail': 're:^https?://.*/thumbnail/.*',
- 'timestamp': int,
- },
- },
- {
- 'url': 'http://www.kaltura.com/index.php/kwidget/cache_st/1300318621/wid/_269692/uiconf_id/3873291/entry_id/1_1jc2y3e4',
- 'only_matching': True,
- },
- {
- 'url': 'https://cdnapisec.kaltura.com/index.php/kwidget/wid/_557781/uiconf_id/22845202/entry_id/1_plr1syf3',
- 'only_matching': True,
- },
- {
- 'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.30.2/mwEmbedFrame.php/p/1337/uiconf_id/20540612/entry_id/1_sf5ovm7u?wid=_243342',
- 'only_matching': True,
- },
- {
- # video with subtitles
- 'url': 'kaltura:111032:1_cw786r8q',
- 'only_matching': True,
- },
- {
- # video with ttml subtitles (no fileExt)
- 'url': 'kaltura:1926081:0_l5ye1133',
- 'info_dict': {
- 'id': '0_l5ye1133',
- 'ext': 'mp4',
- 'title': 'What Can You Do With Python?',
- 'upload_date': '20160221',
- 'uploader_id': 'stork',
- 'thumbnail': 're:^https?://.*/thumbnail/.*',
- 'timestamp': int,
- 'subtitles': {
- 'en': [{
- 'ext': 'ttml',
- }],
- },
- },
- 'skip': 'Gone. Maybe https://www.safaribooksonline.com/library/tutorials/introduction-to-python-anon/3469/',
- 'params': {
- 'skip_download': True,
- },
- },
- {
- 'url': 'https://www.kaltura.com/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
- 'only_matching': True,
- },
- {
- 'url': 'https://www.kaltura.com:443/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
- 'only_matching': True,
- },
- {
- # unavailable source format
- 'url': 'kaltura:513551:1_66x4rg7o',
- 'only_matching': True,
- }
- ]
-
- @staticmethod
- def _extract_url(webpage):
- # Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
- mobj = (
- re.search(
- r"""(?xs)
- kWidget\.(?:thumb)?[Ee]mbed\(
- \{.*?
- (?P<q1>['"])wid(?P=q1)\s*:\s*
- (?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
- (?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
- (?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
- """, webpage)
- or re.search(
- r'''(?xs)
- (?P<q1>["'])
- (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
- (?P=q1).*?
- (?:
- (?:
- entry_?[Ii]d|
- (?P<q2>["'])entry_?[Ii]d(?P=q2)
- )\s*:\s*|
- \[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s*
- )
- (?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
- ''', webpage)
- or re.search(
- r'''(?xs)
- <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])
- (?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
- (?:(?!(?P=q1)).)*
- [?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
- (?:(?!(?P=q1)).)*
- (?P=q1)
- ''', webpage)
- )
- if mobj:
- embed_info = mobj.groupdict()
- for k, v in embed_info.items():
- if v:
- embed_info[k] = v.strip()
- url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
- escaped_pid = re.escape(embed_info['partner_id'])
- service_mobj = re.search(
- r'<script[^>]+src=(["\'])(?P<id>(?:https?:)?//(?:(?!\1).)+)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid),
- webpage)
- if service_mobj:
- url = smuggle_url(url, {'service_url': service_mobj.group('id')})
- return url
-
- def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs):
- params = actions[0]
- if len(actions) > 1:
- for i, a in enumerate(actions[1:], start=1):
- for k, v in a.items():
- params['%d:%s' % (i, k)] = v
-
- data = self._download_json(
- (service_url or self._SERVICE_URL) + self._SERVICE_BASE,
- video_id, query=params, *args, **kwargs)
-
- status = data if len(actions) == 1 else data[0]
- if status.get('objectType') == 'KalturaAPIException':
- raise ExtractorError(
- '%s said: %s' % (self.IE_NAME, status['message']))
-
- return data
-
- def _get_video_info(self, video_id, partner_id, service_url=None):
- actions = [
- {
- 'action': 'null',
- 'apiVersion': '3.1.5',
- 'clientTag': 'kdp:v3.8.5',
- 'format': 1, # JSON, 2 = XML, 3 = PHP
- 'service': 'multirequest',
- },
- {
- 'expiry': 86400,
- 'service': 'session',
- 'action': 'startWidgetSession',
- 'widgetId': '_%s' % partner_id,
- },
- {
- 'action': 'get',
- 'entryId': video_id,
- 'service': 'baseentry',
- 'ks': '{1:result:ks}',
- 'responseProfile:fields': 'createdAt,dataUrl,duration,name,plays,thumbnailUrl,userId',
- 'responseProfile:type': 1,
- },
- {
- 'action': 'getbyentryid',
- 'entryId': video_id,
- 'service': 'flavorAsset',
- 'ks': '{1:result:ks}',
- },
- {
- 'action': 'list',
- 'filter:entryIdEqual': video_id,
- 'service': 'caption_captionasset',
- 'ks': '{1:result:ks}',
- },
- ]
- return self._kaltura_api_call(
- video_id, actions, service_url, note='Downloading video info JSON')
-
- def _real_extract(self, url):
- url, smuggled_data = unsmuggle_url(url, {})
-
- mobj = re.match(self._VALID_URL, url)
- partner_id, entry_id = mobj.group('partner_id', 'id')
- ks = None
- captions = None
- if partner_id and entry_id:
- _, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id, smuggled_data.get('service_url'))
- else:
- path, query = mobj.group('path', 'query')
- if not path and not query:
- raise ExtractorError('Invalid URL', expected=True)
- params = {}
- if query:
- params = compat_parse_qs(query)
- if path:
- splitted_path = path.split('/')
- params.update(dict((zip(splitted_path[::2], [[v] for v in splitted_path[1::2]]))))
- if 'wid' in params:
- partner_id = params['wid'][0][1:]
- elif 'p' in params:
- partner_id = params['p'][0]
- elif 'partner_id' in params:
- partner_id = params['partner_id'][0]
- else:
- raise ExtractorError('Invalid URL', expected=True)
- if 'entry_id' in params:
- entry_id = params['entry_id'][0]
- _, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id)
- elif 'uiconf_id' in params and 'flashvars[referenceId]' in params:
- reference_id = params['flashvars[referenceId]'][0]
- webpage = self._download_webpage(url, reference_id)
- entry_data = self._parse_json(self._search_regex(
- r'window\.kalturaIframePackageData\s*=\s*({.*});',
- webpage, 'kalturaIframePackageData'),
- reference_id)['entryResult']
- info, flavor_assets = entry_data['meta'], entry_data['contextData']['flavorAssets']
- entry_id = info['id']
- # Unfortunately, data returned in kalturaIframePackageData lacks
- # captions so we will try requesting the complete data using
- # regular approach since we now know the entry_id
- try:
- _, info, flavor_assets, captions = self._get_video_info(
- entry_id, partner_id)
- except ExtractorError:
- # Regular scenario failed but we already have everything
- # extracted apart from captions and can process at least
- # with this
- pass
- else:
- raise ExtractorError('Invalid URL', expected=True)
- ks = params.get('flashvars[ks]', [None])[0]
-
- source_url = smuggled_data.get('source_url')
- if source_url:
- referrer = base64.b64encode(
- '://'.join(compat_urlparse.urlparse(source_url)[:2])
- .encode('utf-8')).decode('utf-8')
- else:
- referrer = None
-
- def sign_url(unsigned_url):
- if ks:
- unsigned_url += '/ks/%s' % ks
- if referrer:
- unsigned_url += '?referrer=%s' % referrer
- return unsigned_url
-
- data_url = info['dataUrl']
- if '/flvclipper/' in data_url:
- data_url = re.sub(r'/flvclipper/.*', '/serveFlavor', data_url)
-
- formats = []
- for f in flavor_assets:
- # Continue if asset is not ready
- if f.get('status') != 2:
- continue
- # Original format that's not available (e.g. kaltura:1926081:0_c03e1b5g)
- # skip for now.
- if f.get('fileExt') == 'chun':
- continue
- # DRM-protected video, cannot be decrypted
- if f.get('fileExt') == 'wvm':
- continue
- if not f.get('fileExt'):
- # QT indicates QuickTime; some videos have broken fileExt
- if f.get('containerFormat') == 'qt':
- f['fileExt'] = 'mov'
- else:
- f['fileExt'] = 'mp4'
- video_url = sign_url(
- '%s/flavorId/%s' % (data_url, f['id']))
- format_id = '%(fileExt)s-%(bitrate)s' % f
- # Source format may not be available (e.g. kaltura:513551:1_66x4rg7o)
- if f.get('isOriginal') is True and not self._is_valid_url(
- video_url, entry_id, format_id):
- continue
- # audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g
- # -f mp4-56)
- vcodec = 'none' if 'videoCodecId' not in f and f.get(
- 'frameRate') == 0 else f.get('videoCodecId')
- formats.append({
- 'format_id': format_id,
- 'ext': f.get('fileExt'),
- 'tbr': int_or_none(f['bitrate']),
- 'fps': int_or_none(f.get('frameRate')),
- 'filesize_approx': int_or_none(f.get('size'), invscale=1024),
- 'container': f.get('containerFormat'),
- 'vcodec': vcodec,
- 'height': int_or_none(f.get('height')),
- 'width': int_or_none(f.get('width')),
- 'url': video_url,
- })
- if '/playManifest/' in data_url:
- m3u8_url = sign_url(data_url.replace(
- 'format/url', 'format/applehttp'))
- formats.extend(self._extract_m3u8_formats(
- m3u8_url, entry_id, 'mp4', 'm3u8_native',
- m3u8_id='hls', fatal=False))
-
- self._sort_formats(formats)
-
- subtitles = {}
- if captions:
- for caption in captions.get('objects', []):
- # Continue if caption is not ready
- if caption.get('status') != 2:
- continue
- if not caption.get('id'):
- continue
- caption_format = int_or_none(caption.get('format'))
- subtitles.setdefault(caption.get('languageCode') or caption.get('language'), []).append({
- 'url': '%s/api_v3/service/caption_captionasset/action/serve/captionAssetId/%s' % (self._SERVICE_URL, caption['id']),
- 'ext': caption.get('fileExt') or self._CAPTION_TYPES.get(caption_format) or 'ttml',
- })
-
- return {
- 'id': entry_id,
- 'title': info['name'],
- 'formats': formats,
- 'subtitles': subtitles,
- 'description': clean_html(info.get('description')),
- 'thumbnail': info.get('thumbnailUrl'),
- 'duration': info.get('duration'),
- 'timestamp': info.get('createdAt'),
- 'uploader_id': info.get('userId') if info.get('userId') != 'None' else None,
- 'view_count': info.get('plays'),
- }
diff --git a/youtube_dl/extractor/keek.py b/youtube_dl/extractor/keek.py
deleted file mode 100644
index 94a03d277..000000000
--- a/youtube_dl/extractor/keek.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-
-
-class KeekIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?keek\.com/keek/(?P<id>\w+)'
- IE_NAME = 'keek'
- _TEST = {
- 'url': 'https://www.keek.com/keek/NODfbab',
- 'md5': '9b0636f8c0f7614afa4ea5e4c6e57e83',
- 'info_dict': {
- 'id': 'NODfbab',
- 'ext': 'mp4',
- 'title': 'md5:35d42050a3ece241d5ddd7fdcc6fd896',
- 'uploader': 'ytdl',
- 'uploader_id': 'eGT5bab',
- },
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- return {
- 'id': video_id,
- 'url': self._og_search_video_url(webpage),
- 'ext': 'mp4',
- 'title': self._og_search_description(webpage).strip(),
- 'thumbnail': self._og_search_thumbnail(webpage),
- 'uploader': self._search_regex(
- r'data-username=(["\'])(?P<uploader>.+?)\1', webpage,
- 'uploader', fatal=False, group='uploader'),
- 'uploader_id': self._search_regex(
- r'data-user-id=(["\'])(?P<uploader_id>.+?)\1', webpage,
- 'uploader id', fatal=False, group='uploader_id'),
- }
diff --git a/youtube_dl/extractor/kontrtube.py b/youtube_dl/extractor/kontrtube.py
deleted file mode 100644
index 1fda45107..000000000
--- a/youtube_dl/extractor/kontrtube.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- parse_duration,
-)
-
-
-class KontrTubeIE(InfoExtractor):
- IE_NAME = 'kontrtube'
- IE_DESC = 'KontrTube.ru - Труба зовёт'
- _VALID_URL = r'https?://(?:www\.)?kontrtube\.ru/videos/(?P<id>\d+)/(?P<display_id>[^/]+)/'
-
- _TEST = {
- 'url': 'http://www.kontrtube.ru/videos/2678/nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag/',
- 'md5': '975a991a4926c9a85f383a736a2e6b80',
- 'info_dict': {
- 'id': '2678',
- 'display_id': 'nad-olimpiyskoy-derevney-v-sochi-podnyat-rossiyskiy-flag',
- 'ext': 'mp4',
- 'title': 'Над олимпийской деревней в Сочи поднят российский флаг',
- 'description': 'md5:80edc4c613d5887ae8ccf1d59432be41',
- 'thumbnail': 'http://www.kontrtube.ru/contents/videos_screenshots/2000/2678/preview.mp4.jpg',
- 'duration': 270,
- }
- }
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- display_id = mobj.group('display_id')
-
- webpage = self._download_webpage(
- url, display_id, 'Downloading page')
-
- video_url = self._search_regex(
- r"video_url\s*:\s*'(.+?)/?',", webpage, 'video URL')
- thumbnail = self._search_regex(
- r"preview_url\s*:\s*'(.+?)/?',", webpage, 'thumbnail', fatal=False)
- title = self._html_search_regex(
- r'(?s)<h2>(.+?)</h2>', webpage, 'title')
- description = self._html_search_meta(
- 'description', webpage, 'description')
-
- duration = self._search_regex(
- r'Длительность: <em>([^<]+)</em>', webpage, 'duration', fatal=False)
- if duration:
- duration = parse_duration(duration.replace('мин', 'min').replace('сек', 'sec'))
-
- view_count = self._search_regex(
- r'Просмотров: <em>([^<]+)</em>',
- webpage, 'view count', fatal=False)
- if view_count:
- view_count = int_or_none(view_count.replace(' ', ''))
-
- comment_count = int_or_none(self._search_regex(
- r'Комментарии \((\d+)\)<', webpage, ' comment count', fatal=False))
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'url': video_url,
- 'thumbnail': thumbnail,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'view_count': int_or_none(view_count),
- 'comment_count': int_or_none(comment_count),
- }
diff --git a/youtube_dl/extractor/la7.py b/youtube_dl/extractor/la7.py
deleted file mode 100644
index 6373268c4..000000000
--- a/youtube_dl/extractor/la7.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- js_to_json,
- smuggle_url,
-)
-
-
-class LA7IE(InfoExtractor):
- IE_NAME = 'la7.it'
- _VALID_URL = r'''(?x)(https?://)?(?:
- (?:www\.)?la7\.it/([^/]+)/(?:rivedila7|video)/|
- tg\.la7\.it/repliche-tgla7\?id=
- )(?P<id>.+)'''
-
- _TESTS = [{
- # 'src' is a plain URL
- 'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722',
- 'md5': '8b613ffc0c4bf9b9e377169fc19c214c',
- 'info_dict': {
- 'id': 'inccool8-02-10-2015-163722',
- 'ext': 'mp4',
- 'title': 'Inc.Cool8',
- 'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto atletico',
- 'thumbnail': 're:^https?://.*',
- 'uploader_id': 'kdla7pillole@iltrovatore.it',
- 'timestamp': 1443814869,
- 'upload_date': '20151002',
- },
- }, {
- # 'src' is a dictionary
- 'url': 'http://tg.la7.it/repliche-tgla7?id=189080',
- 'md5': '6b0d8888d286e39870208dfeceaf456b',
- 'info_dict': {
- 'id': '189080',
- 'ext': 'mp4',
- 'title': 'TG LA7',
- },
- }, {
- 'url': 'http://www.la7.it/omnibus/rivedila7/omnibus-news-02-07-2016-189077',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- player_data = self._parse_json(
- self._search_regex(
- [r'(?s)videoParams\s*=\s*({.+?});', r'videoLa7\(({[^;]+})\);'],
- webpage, 'player data'),
- video_id, transform_source=js_to_json)
-
- return {
- '_type': 'url_transparent',
- 'url': smuggle_url('kaltura:103:%s' % player_data['vid'], {
- 'service_url': 'http://kdam.iltrovatore.it',
- }),
- 'id': video_id,
- 'title': player_data['title'],
- 'description': self._og_search_description(webpage, default=None),
- 'thumbnail': player_data.get('poster'),
- 'ie_key': 'Kaltura',
- }
diff --git a/youtube_dl/extractor/learnr.py b/youtube_dl/extractor/learnr.py
deleted file mode 100644
index 1435e090e..000000000
--- a/youtube_dl/extractor/learnr.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-
-
-class LearnrIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?learnr\.pro/view/video/(?P<id>[0-9]+)'
- _TEST = {
- 'url': 'http://www.learnr.pro/view/video/51624-web-development-tutorial-for-beginners-1-how-to-build-webpages-with-html-css-javascript',
- 'md5': '3719fdf0a68397f49899e82c308a89de',
- 'info_dict': {
- 'id': '51624',
- 'ext': 'mp4',
- 'title': 'Web Development Tutorial for Beginners (#1) - How to build webpages with HTML, CSS, Javascript',
- 'description': 'md5:b36dbfa92350176cdf12b4d388485503',
- 'uploader': 'LearnCode.academy',
- 'uploader_id': 'learncodeacademy',
- 'upload_date': '20131021',
- },
- 'add_ie': ['Youtube'],
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- return {
- '_type': 'url_transparent',
- 'url': self._search_regex(
- r"videoId\s*:\s*'([^']+)'", webpage, 'youtube id'),
- 'id': video_id,
- }
diff --git a/youtube_dl/extractor/lecturio.py b/youtube_dl/extractor/lecturio.py
deleted file mode 100644
index 6ed7da4ab..000000000
--- a/youtube_dl/extractor/lecturio.py
+++ /dev/null
@@ -1,244 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- clean_html,
- determine_ext,
- ExtractorError,
- float_or_none,
- int_or_none,
- str_or_none,
- url_or_none,
- urlencode_postdata,
- urljoin,
-)
-
-
-class LecturioBaseIE(InfoExtractor):
- _API_BASE_URL = 'https://app.lecturio.com/api/en/latest/html5/'
- _LOGIN_URL = 'https://app.lecturio.com/en/login'
- _NETRC_MACHINE = 'lecturio'
-
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
- # Sets some cookies
- _, urlh = self._download_webpage_handle(
- self._LOGIN_URL, None, 'Downloading login popup')
-
- def is_logged(url_handle):
- return self._LOGIN_URL not in compat_str(url_handle.geturl())
-
- # Already logged in
- if is_logged(urlh):
- return
-
- login_form = {
- 'signin[email]': username,
- 'signin[password]': password,
- 'signin[remember]': 'on',
- }
-
- response, urlh = self._download_webpage_handle(
- self._LOGIN_URL, None, 'Logging in',
- data=urlencode_postdata(login_form))
-
- # Logged in successfully
- if is_logged(urlh):
- return
-
- errors = self._html_search_regex(
- r'(?s)<ul[^>]+class=["\']error_list[^>]+>(.+?)</ul>', response,
- 'errors', default=None)
- if errors:
- raise ExtractorError('Unable to login: %s' % errors, expected=True)
- raise ExtractorError('Unable to log in')
-
-
-class LecturioIE(LecturioBaseIE):
- _VALID_URL = r'''(?x)
- https://
- (?:
- app\.lecturio\.com/([^/]+/(?P<nt>[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P<id>\d+))|
- (?:www\.)?lecturio\.de/[^/]+/(?P<nt_de>[^/?#&]+)\.vortrag
- )
- '''
- _TESTS = [{
- 'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos',
- 'md5': '9a42cf1d8282a6311bf7211bbde26fde',
- 'info_dict': {
- 'id': '39634',
- 'ext': 'mp4',
- 'title': 'Important Concepts and Terms — Introduction to Microbiology',
- },
- 'skip': 'Requires lecturio account credentials',
- }, {
- 'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag',
- 'only_matching': True,
- }, {
- 'url': 'https://app.lecturio.com/#/lecture/c/6434/39634',
- 'only_matching': True,
- }]
-
- _CC_LANGS = {
- 'Arabic': 'ar',
- 'Bulgarian': 'bg',
- 'German': 'de',
- 'English': 'en',
- 'Spanish': 'es',
- 'Persian': 'fa',
- 'French': 'fr',
- 'Japanese': 'ja',
- 'Polish': 'pl',
- 'Pashto': 'ps',
- 'Russian': 'ru',
- }
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- nt = mobj.group('nt') or mobj.group('nt_de')
- lecture_id = mobj.group('id')
- display_id = nt or lecture_id
- api_path = 'lectures/' + lecture_id if lecture_id else 'lecture/' + nt + '.json'
- video = self._download_json(
- self._API_BASE_URL + api_path, display_id)
- title = video['title'].strip()
- if not lecture_id:
- pid = video.get('productId') or video.get('uid')
- if pid:
- spid = pid.split('_')
- if spid and len(spid) == 2:
- lecture_id = spid[1]
-
- formats = []
- for format_ in video['content']['media']:
- if not isinstance(format_, dict):
- continue
- file_ = format_.get('file')
- if not file_:
- continue
- ext = determine_ext(file_)
- if ext == 'smil':
- # smil contains only broken RTMP formats anyway
- continue
- file_url = url_or_none(file_)
- if not file_url:
- continue
- label = str_or_none(format_.get('label'))
- filesize = int_or_none(format_.get('fileSize'))
- f = {
- 'url': file_url,
- 'format_id': label,
- 'filesize': float_or_none(filesize, invscale=1000)
- }
- if label:
- mobj = re.match(r'(\d+)p\s*\(([^)]+)\)', label)
- if mobj:
- f.update({
- 'format_id': mobj.group(2),
- 'height': int(mobj.group(1)),
- })
- formats.append(f)
- self._sort_formats(formats)
-
- subtitles = {}
- automatic_captions = {}
- captions = video.get('captions') or []
- for cc in captions:
- cc_url = cc.get('url')
- if not cc_url:
- continue
- cc_label = cc.get('translatedCode')
- lang = cc.get('languageCode') or self._search_regex(
- r'/([a-z]{2})_', cc_url, 'lang',
- default=cc_label.split()[0] if cc_label else 'en')
- original_lang = self._search_regex(
- r'/[a-z]{2}_([a-z]{2})_', cc_url, 'original lang',
- default=None)
- sub_dict = (automatic_captions
- if 'auto-translated' in cc_label or original_lang
- else subtitles)
- sub_dict.setdefault(self._CC_LANGS.get(lang, lang), []).append({
- 'url': cc_url,
- })
-
- return {
- 'id': lecture_id or nt,
- 'title': title,
- 'formats': formats,
- 'subtitles': subtitles,
- 'automatic_captions': automatic_captions,
- }
-
-
-class LecturioCourseIE(LecturioBaseIE):
- _VALID_URL = r'https://app\.lecturio\.com/(?:[^/]+/(?P<nt>[^/?#&]+)\.course|(?:#/)?course/c/(?P<id>\d+))'
- _TESTS = [{
- 'url': 'https://app.lecturio.com/medical-courses/microbiology-introduction.course#/',
- 'info_dict': {
- 'id': 'microbiology-introduction',
- 'title': 'Microbiology: Introduction',
- 'description': 'md5:13da8500c25880c6016ae1e6d78c386a',
- },
- 'playlist_count': 45,
- 'skip': 'Requires lecturio account credentials',
- }, {
- 'url': 'https://app.lecturio.com/#/course/c/6434',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- nt, course_id = re.match(self._VALID_URL, url).groups()
- display_id = nt or course_id
- api_path = 'courses/' + course_id if course_id else 'course/content/' + nt + '.json'
- course = self._download_json(
- self._API_BASE_URL + api_path, display_id)
- entries = []
- for lecture in course.get('lectures', []):
- lecture_id = str_or_none(lecture.get('id'))
- lecture_url = lecture.get('url')
- if lecture_url:
- lecture_url = urljoin(url, lecture_url)
- else:
- lecture_url = 'https://app.lecturio.com/#/lecture/c/%s/%s' % (course_id, lecture_id)
- entries.append(self.url_result(
- lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id))
- return self.playlist_result(
- entries, display_id, course.get('title'),
- clean_html(course.get('description')))
-
-
-class LecturioDeCourseIE(LecturioBaseIE):
- _VALID_URL = r'https://(?:www\.)?lecturio\.de/[^/]+/(?P<id>[^/?#&]+)\.kurs'
- _TEST = {
- 'url': 'https://www.lecturio.de/jura/grundrechte.kurs',
- 'only_matching': True,
- }
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- entries = []
- for mobj in re.finditer(
- r'(?s)<td[^>]+\bdata-lecture-id=["\'](?P<id>\d+).+?\bhref=(["\'])(?P<url>(?:(?!\2).)+\.vortrag)\b[^>]+>',
- webpage):
- lecture_url = urljoin(url, mobj.group('url'))
- lecture_id = mobj.group('id')
- entries.append(self.url_result(
- lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id))
-
- title = self._search_regex(
- r'<h1[^>]*>([^<]+)', webpage, 'title', default=None)
-
- return self.playlist_result(entries, display_id, title)
diff --git a/youtube_dl/extractor/lego.py b/youtube_dl/extractor/lego.py
deleted file mode 100644
index b312e77f1..000000000
--- a/youtube_dl/extractor/lego.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- unescapeHTML,
- parse_duration,
- get_element_by_class,
-)
-
-
-class LEGOIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?lego\.com/(?P<locale>[^/]+)/(?:[^/]+/)*videos/(?:[^/]+/)*[^/?#]+-(?P<id>[0-9a-f]+)'
- _TESTS = [{
- 'url': 'http://www.lego.com/en-us/videos/themes/club/blocumentary-kawaguchi-55492d823b1b4d5e985787fa8c2973b1',
- 'md5': 'f34468f176cfd76488767fc162c405fa',
- 'info_dict': {
- 'id': '55492d823b1b4d5e985787fa8c2973b1',
- 'ext': 'mp4',
- 'title': 'Blocumentary Great Creations: Akiyuki Kawaguchi',
- 'description': 'Blocumentary Great Creations: Akiyuki Kawaguchi',
- },
- }, {
- # geo-restricted but the contentUrl contain a valid url
- 'url': 'http://www.lego.com/nl-nl/videos/themes/nexoknights/episode-20-kingdom-of-heroes-13bdc2299ab24d9685701a915b3d71e7##sp=399',
- 'md5': '4c3fec48a12e40c6e5995abc3d36cc2e',
- 'info_dict': {
- 'id': '13bdc2299ab24d9685701a915b3d71e7',
- 'ext': 'mp4',
- 'title': 'Aflevering 20 - Helden van het koninkrijk',
- 'description': 'md5:8ee499aac26d7fa8bcb0cedb7f9c3941',
- },
- }, {
- # special characters in title
- 'url': 'http://www.lego.com/en-us/starwars/videos/lego-star-wars-force-surprise-9685ee9d12e84ff38e84b4e3d0db533d',
- 'info_dict': {
- 'id': '9685ee9d12e84ff38e84b4e3d0db533d',
- 'ext': 'mp4',
- 'title': 'Force Surprise – LEGO® Star Wars™ Microfighters',
- 'description': 'md5:9c673c96ce6f6271b88563fe9dc56de3',
- },
- 'params': {
- 'skip_download': True,
- },
- }]
- _BITRATES = [256, 512, 1024, 1536, 2560]
-
- def _real_extract(self, url):
- locale, video_id = re.match(self._VALID_URL, url).groups()
- webpage = self._download_webpage(url, video_id)
- title = get_element_by_class('video-header', webpage).strip()
- progressive_base = 'https://lc-mediaplayerns-live-s.legocdn.com/'
- streaming_base = 'http://legoprod-f.akamaihd.net/'
- content_url = self._html_search_meta('contentUrl', webpage)
- path = self._search_regex(
- r'(?:https?:)?//[^/]+/(?:[iz]/s/)?public/(.+)_[0-9,]+\.(?:mp4|webm)',
- content_url, 'video path', default=None)
- if not path:
- player_url = self._proto_relative_url(self._search_regex(
- r'<iframe[^>]+src="((?:https?)?//(?:www\.)?lego\.com/[^/]+/mediaplayer/video/[^"]+)',
- webpage, 'player url', default=None))
- if not player_url:
- base_url = self._proto_relative_url(self._search_regex(
- r'data-baseurl="([^"]+)"', webpage, 'base url',
- default='http://www.lego.com/%s/mediaplayer/video/' % locale))
- player_url = base_url + video_id
- player_webpage = self._download_webpage(player_url, video_id)
- video_data = self._parse_json(unescapeHTML(self._search_regex(
- r"video='([^']+)'", player_webpage, 'video data')), video_id)
- progressive_base = self._search_regex(
- r'data-video-progressive-url="([^"]+)"',
- player_webpage, 'progressive base', default='https://lc-mediaplayerns-live-s.legocdn.com/')
- streaming_base = self._search_regex(
- r'data-video-streaming-url="([^"]+)"',
- player_webpage, 'streaming base', default='http://legoprod-f.akamaihd.net/')
- item_id = video_data['ItemId']
-
- net_storage_path = video_data.get('NetStoragePath') or '/'.join([item_id[:2], item_id[2:4]])
- base_path = '_'.join([item_id, video_data['VideoId'], video_data['Locale'], compat_str(video_data['VideoVersion'])])
- path = '/'.join([net_storage_path, base_path])
- streaming_path = ','.join(map(lambda bitrate: compat_str(bitrate), self._BITRATES))
-
- formats = self._extract_akamai_formats(
- '%si/s/public/%s_,%s,.mp4.csmil/master.m3u8' % (streaming_base, path, streaming_path), video_id)
- m3u8_formats = list(filter(
- lambda f: f.get('protocol') == 'm3u8_native' and f.get('vcodec') != 'none',
- formats))
- if len(m3u8_formats) == len(self._BITRATES):
- self._sort_formats(m3u8_formats)
- for bitrate, m3u8_format in zip(self._BITRATES, m3u8_formats):
- progressive_base_url = '%spublic/%s_%d.' % (progressive_base, path, bitrate)
- mp4_f = m3u8_format.copy()
- mp4_f.update({
- 'url': progressive_base_url + 'mp4',
- 'format_id': m3u8_format['format_id'].replace('hls', 'mp4'),
- 'protocol': 'http',
- })
- web_f = {
- 'url': progressive_base_url + 'webm',
- 'format_id': m3u8_format['format_id'].replace('hls', 'webm'),
- 'width': m3u8_format['width'],
- 'height': m3u8_format['height'],
- 'tbr': m3u8_format.get('tbr'),
- 'ext': 'webm',
- }
- formats.extend([web_f, mp4_f])
- else:
- for bitrate in self._BITRATES:
- for ext in ('web', 'mp4'):
- formats.append({
- 'format_id': '%s-%s' % (ext, bitrate),
- 'url': '%spublic/%s_%d.%s' % (progressive_base, path, bitrate, ext),
- 'tbr': bitrate,
- 'ext': ext,
- })
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': self._html_search_meta('description', webpage),
- 'thumbnail': self._html_search_meta('thumbnail', webpage),
- 'duration': parse_duration(self._html_search_meta('duration', webpage)),
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/limelight.py b/youtube_dl/extractor/limelight.py
deleted file mode 100644
index 729d8de50..000000000
--- a/youtube_dl/extractor/limelight.py
+++ /dev/null
@@ -1,377 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_HTTPError
-from ..utils import (
- determine_ext,
- float_or_none,
- int_or_none,
- smuggle_url,
- try_get,
- unsmuggle_url,
- ExtractorError,
-)
-
-
-class LimelightBaseIE(InfoExtractor):
- _PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
- _API_URL = 'http://api.video.limelight.com/rest/organizations/%s/%s/%s/%s.json'
-
- @classmethod
- def _extract_urls(cls, webpage, source_url):
- lm = {
- 'Media': 'media',
- 'Channel': 'channel',
- 'ChannelList': 'channel_list',
- }
-
- def smuggle(url):
- return smuggle_url(url, {'source_url': source_url})
-
- entries = []
- for kind, video_id in re.findall(
- r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})',
- webpage):
- entries.append(cls.url_result(
- smuggle('limelight:%s:%s' % (lm[kind], video_id)),
- 'Limelight%s' % kind, video_id))
- for mobj in re.finditer(
- # As per [1] class attribute should be exactly equal to
- # LimelightEmbeddedPlayerFlash but numerous examples seen
- # that don't exactly match it (e.g. [2]).
- # 1. http://support.3playmedia.com/hc/en-us/articles/227732408-Limelight-Embedding-the-Captions-Plugin-with-the-Limelight-Player-on-Your-Webpage
- # 2. http://www.sedona.com/FacilitatorTraining2017
- r'''(?sx)
- <object[^>]+class=(["\'])(?:(?!\1).)*\bLimelightEmbeddedPlayerFlash\b(?:(?!\1).)*\1[^>]*>.*?
- <param[^>]+
- name=(["\'])flashVars\2[^>]+
- value=(["\'])(?:(?!\3).)*(?P<kind>media|channel(?:List)?)Id=(?P<id>[a-z0-9]{32})
- ''', webpage):
- kind, video_id = mobj.group('kind'), mobj.group('id')
- entries.append(cls.url_result(
- smuggle('limelight:%s:%s' % (kind, video_id)),
- 'Limelight%s' % kind.capitalize(), video_id))
- # http://support.3playmedia.com/hc/en-us/articles/115009517327-Limelight-Embedding-the-Audio-Description-Plugin-with-the-Limelight-Player-on-Your-Web-Page)
- for video_id in re.findall(
- r'(?s)LimelightPlayerUtil\.embed\s*\(\s*{.*?\bmediaId["\']\s*:\s*["\'](?P<id>[a-z0-9]{32})',
- webpage):
- entries.append(cls.url_result(
- smuggle('limelight:media:%s' % video_id),
- LimelightMediaIE.ie_key(), video_id))
- return entries
-
- def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
- headers = {}
- if referer:
- headers['Referer'] = referer
- try:
- return self._download_json(
- self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
- item_id, 'Downloading PlaylistService %s JSON' % method, fatal=fatal, headers=headers)
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
- error = self._parse_json(e.cause.read().decode(), item_id)['detail']['contentAccessPermission']
- if error == 'CountryDisabled':
- self.raise_geo_restricted()
- raise ExtractorError(error, expected=True)
- raise
-
- def _call_api(self, organization_id, item_id, method):
- return self._download_json(
- self._API_URL % (organization_id, self._API_PATH, item_id, method),
- item_id, 'Downloading API %s JSON' % method)
-
- def _extract(self, item_id, pc_method, mobile_method, meta_method, referer=None):
- pc = self._call_playlist_service(item_id, pc_method, referer=referer)
- metadata = self._call_api(pc['orgId'], item_id, meta_method)
- mobile = self._call_playlist_service(item_id, mobile_method, fatal=False, referer=referer)
- return pc, mobile, metadata
-
- def _extract_info(self, streams, mobile_urls, properties):
- video_id = properties['media_id']
- formats = []
- urls = []
- for stream in streams:
- stream_url = stream.get('url')
- if not stream_url or stream.get('drmProtected') or stream_url in urls:
- continue
- urls.append(stream_url)
- ext = determine_ext(stream_url)
- if ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- stream_url, video_id, f4m_id='hds', fatal=False))
- else:
- fmt = {
- 'url': stream_url,
- 'abr': float_or_none(stream.get('audioBitRate')),
- 'fps': float_or_none(stream.get('videoFrameRate')),
- 'ext': ext,
- }
- width = int_or_none(stream.get('videoWidthInPixels'))
- height = int_or_none(stream.get('videoHeightInPixels'))
- vbr = float_or_none(stream.get('videoBitRate'))
- if width or height or vbr:
- fmt.update({
- 'width': width,
- 'height': height,
- 'vbr': vbr,
- })
- else:
- fmt['vcodec'] = 'none'
- rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', stream_url)
- if rtmp:
- format_id = 'rtmp'
- if stream.get('videoBitRate'):
- format_id += '-%d' % int_or_none(stream['videoBitRate'])
- http_format_id = format_id.replace('rtmp', 'http')
-
- CDN_HOSTS = (
- ('delvenetworks.com', 'cpl.delvenetworks.com'),
- ('video.llnw.net', 's2.content.video.llnw.net'),
- )
- for cdn_host, http_host in CDN_HOSTS:
- if cdn_host not in rtmp.group('host').lower():
- continue
- http_url = 'http://%s/%s' % (http_host, rtmp.group('playpath')[4:])
- urls.append(http_url)
- if self._is_valid_url(http_url, video_id, http_format_id):
- http_fmt = fmt.copy()
- http_fmt.update({
- 'url': http_url,
- 'format_id': http_format_id,
- })
- formats.append(http_fmt)
- break
-
- fmt.update({
- 'url': rtmp.group('url'),
- 'play_path': rtmp.group('playpath'),
- 'app': rtmp.group('app'),
- 'ext': 'flv',
- 'format_id': format_id,
- })
- formats.append(fmt)
-
- for mobile_url in mobile_urls:
- media_url = mobile_url.get('mobileUrl')
- format_id = mobile_url.get('targetMediaPlatform')
- if not media_url or format_id in ('Widevine', 'SmoothStreaming') or media_url in urls:
- continue
- urls.append(media_url)
- ext = determine_ext(media_url)
- if ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- media_url, video_id, 'mp4', 'm3u8_native',
- m3u8_id=format_id, fatal=False))
- elif ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- stream_url, video_id, f4m_id=format_id, fatal=False))
- else:
- formats.append({
- 'url': media_url,
- 'format_id': format_id,
- 'preference': -1,
- 'ext': ext,
- })
-
- self._sort_formats(formats)
-
- title = properties['title']
- description = properties.get('description')
- timestamp = int_or_none(properties.get('publish_date') or properties.get('create_date'))
- duration = float_or_none(properties.get('duration_in_milliseconds'), 1000)
- filesize = int_or_none(properties.get('total_storage_in_bytes'))
- categories = [properties.get('category')]
- tags = properties.get('tags', [])
- thumbnails = [{
- 'url': thumbnail['url'],
- 'width': int_or_none(thumbnail.get('width')),
- 'height': int_or_none(thumbnail.get('height')),
- } for thumbnail in properties.get('thumbnails', []) if thumbnail.get('url')]
-
- subtitles = {}
- for caption in properties.get('captions', []):
- lang = caption.get('language_code')
- subtitles_url = caption.get('url')
- if lang and subtitles_url:
- subtitles.setdefault(lang, []).append({
- 'url': subtitles_url,
- })
- closed_captions_url = properties.get('closed_captions_url')
- if closed_captions_url:
- subtitles.setdefault('en', []).append({
- 'url': closed_captions_url,
- 'ext': 'ttml',
- })
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'formats': formats,
- 'timestamp': timestamp,
- 'duration': duration,
- 'filesize': filesize,
- 'categories': categories,
- 'tags': tags,
- 'thumbnails': thumbnails,
- 'subtitles': subtitles,
- }
-
- def _extract_info_helper(self, pc, mobile, i, metadata):
- return self._extract_info(
- try_get(pc, lambda x: x['playlistItems'][i]['streams'], list) or [],
- try_get(mobile, lambda x: x['mediaList'][i]['mobileUrls'], list) or [],
- metadata)
-
-
-class LimelightMediaIE(LimelightBaseIE):
- IE_NAME = 'limelight'
- _VALID_URL = r'''(?x)
- (?:
- limelight:media:|
- https?://
- (?:
- link\.videoplatform\.limelight\.com/media/|
- assets\.delvenetworks\.com/player/loader\.swf
- )
- \?.*?\bmediaId=
- )
- (?P<id>[a-z0-9]{32})
- '''
- _TESTS = [{
- 'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86',
- 'info_dict': {
- 'id': '3ffd040b522b4485b6d84effc750cd86',
- 'ext': 'mp4',
- 'title': 'HaP and the HB Prince Trailer',
- 'description': 'md5:8005b944181778e313d95c1237ddb640',
- 'thumbnail': r're:^https?://.*\.jpeg$',
- 'duration': 144.23,
- 'timestamp': 1244136834,
- 'upload_date': '20090604',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- # video with subtitles
- 'url': 'limelight:media:a3e00274d4564ec4a9b29b9466432335',
- 'md5': '2fa3bad9ac321e23860ca23bc2c69e3d',
- 'info_dict': {
- 'id': 'a3e00274d4564ec4a9b29b9466432335',
- 'ext': 'mp4',
- 'title': '3Play Media Overview Video',
- 'thumbnail': r're:^https?://.*\.jpeg$',
- 'duration': 78.101,
- 'timestamp': 1338929955,
- 'upload_date': '20120605',
- 'subtitles': 'mincount:9',
- },
- }, {
- 'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452',
- 'only_matching': True,
- }]
- _PLAYLIST_SERVICE_PATH = 'media'
- _API_PATH = 'media'
-
- def _real_extract(self, url):
- url, smuggled_data = unsmuggle_url(url, {})
- video_id = self._match_id(url)
- self._initialize_geo_bypass({
- 'countries': smuggled_data.get('geo_countries'),
- })
-
- pc, mobile, metadata = self._extract(
- video_id, 'getPlaylistByMediaId',
- 'getMobilePlaylistByMediaId', 'properties',
- smuggled_data.get('source_url'))
-
- return self._extract_info_helper(pc, mobile, 0, metadata)
-
-
-class LimelightChannelIE(LimelightBaseIE):
- IE_NAME = 'limelight:channel'
- _VALID_URL = r'''(?x)
- (?:
- limelight:channel:|
- https?://
- (?:
- link\.videoplatform\.limelight\.com/media/|
- assets\.delvenetworks\.com/player/loader\.swf
- )
- \?.*?\bchannelId=
- )
- (?P<id>[a-z0-9]{32})
- '''
- _TESTS = [{
- 'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082',
- 'info_dict': {
- 'id': 'ab6a524c379342f9b23642917020c082',
- 'title': 'Javascript Sample Code',
- },
- 'playlist_mincount': 3,
- }, {
- 'url': 'http://assets.delvenetworks.com/player/loader.swf?channelId=ab6a524c379342f9b23642917020c082',
- 'only_matching': True,
- }]
- _PLAYLIST_SERVICE_PATH = 'channel'
- _API_PATH = 'channels'
-
- def _real_extract(self, url):
- url, smuggled_data = unsmuggle_url(url, {})
- channel_id = self._match_id(url)
-
- pc, mobile, medias = self._extract(
- channel_id, 'getPlaylistByChannelId',
- 'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1',
- 'media', smuggled_data.get('source_url'))
-
- entries = [
- self._extract_info_helper(pc, mobile, i, medias['media_list'][i])
- for i in range(len(medias['media_list']))]
-
- return self.playlist_result(entries, channel_id, pc['title'])
-
-
-class LimelightChannelListIE(LimelightBaseIE):
- IE_NAME = 'limelight:channel_list'
- _VALID_URL = r'''(?x)
- (?:
- limelight:channel_list:|
- https?://
- (?:
- link\.videoplatform\.limelight\.com/media/|
- assets\.delvenetworks\.com/player/loader\.swf
- )
- \?.*?\bchannelListId=
- )
- (?P<id>[a-z0-9]{32})
- '''
- _TESTS = [{
- 'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b',
- 'info_dict': {
- 'id': '301b117890c4465c8179ede21fd92e2b',
- 'title': 'Website - Hero Player',
- },
- 'playlist_mincount': 2,
- }, {
- 'url': 'https://assets.delvenetworks.com/player/loader.swf?channelListId=301b117890c4465c8179ede21fd92e2b',
- 'only_matching': True,
- }]
- _PLAYLIST_SERVICE_PATH = 'channel_list'
-
- def _real_extract(self, url):
- channel_list_id = self._match_id(url)
-
- channel_list = self._call_playlist_service(channel_list_id, 'getMobileChannelListById')
-
- entries = [
- self.url_result('limelight:channel:%s' % channel['id'], 'LimelightChannel')
- for channel in channel_list['channelList']]
-
- return self.playlist_result(entries, channel_list_id, channel_list['title'])
diff --git a/youtube_dl/extractor/linuxacademy.py b/youtube_dl/extractor/linuxacademy.py
deleted file mode 100644
index a78c6556e..000000000
--- a/youtube_dl/extractor/linuxacademy.py
+++ /dev/null
@@ -1,174 +0,0 @@
-from __future__ import unicode_literals
-
-import json
-import random
-import re
-
-from .common import InfoExtractor
-from ..compat import (
- compat_b64decode,
- compat_HTTPError,
- compat_str,
-)
-from ..utils import (
- ExtractorError,
- orderedSet,
- unescapeHTML,
- urlencode_postdata,
- urljoin,
-)
-
-
-class LinuxAcademyIE(InfoExtractor):
- _VALID_URL = r'''(?x)
- https?://
- (?:www\.)?linuxacademy\.com/cp/
- (?:
- courses/lesson/course/(?P<chapter_id>\d+)/lesson/(?P<lesson_id>\d+)|
- modules/view/id/(?P<course_id>\d+)
- )
- '''
- _TESTS = [{
- 'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2/module/154',
- 'info_dict': {
- 'id': '1498-2',
- 'ext': 'mp4',
- 'title': "Introduction to the Practitioner's Brief",
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'Requires Linux Academy account credentials',
- }, {
- 'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2',
- 'only_matching': True,
- }, {
- 'url': 'https://linuxacademy.com/cp/modules/view/id/154',
- 'info_dict': {
- 'id': '154',
- 'title': 'AWS Certified Cloud Practitioner',
- 'description': 'md5:039db7e60e4aac9cf43630e0a75fa834',
- },
- 'playlist_count': 41,
- 'skip': 'Requires Linux Academy account credentials',
- }]
-
- _AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize'
- _ORIGIN_URL = 'https://linuxacademy.com'
- _CLIENT_ID = 'KaWxNn1C2Gc7n83W9OFeXltd8Utb5vvx'
- _NETRC_MACHINE = 'linuxacademy'
-
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
- def random_string():
- return ''.join([
- random.choice('0123456789ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz-._~')
- for _ in range(32)])
-
- webpage, urlh = self._download_webpage_handle(
- self._AUTHORIZE_URL, None, 'Downloading authorize page', query={
- 'client_id': self._CLIENT_ID,
- 'response_type': 'token id_token',
- 'redirect_uri': self._ORIGIN_URL,
- 'scope': 'openid email user_impersonation profile',
- 'audience': self._ORIGIN_URL,
- 'state': random_string(),
- 'nonce': random_string(),
- })
-
- login_data = self._parse_json(
- self._search_regex(
- r'atob\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
- 'login info', group='value'), None,
- transform_source=lambda x: compat_b64decode(x).decode('utf-8')
- )['extraParams']
-
- login_data.update({
- 'client_id': self._CLIENT_ID,
- 'redirect_uri': self._ORIGIN_URL,
- 'tenant': 'lacausers',
- 'connection': 'Username-Password-Authentication',
- 'username': username,
- 'password': password,
- 'sso': 'true',
- })
-
- login_state_url = compat_str(urlh.geturl())
-
- try:
- login_page = self._download_webpage(
- 'https://login.linuxacademy.com/usernamepassword/login', None,
- 'Downloading login page', data=json.dumps(login_data).encode(),
- headers={
- 'Content-Type': 'application/json',
- 'Origin': 'https://login.linuxacademy.com',
- 'Referer': login_state_url,
- })
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
- error = self._parse_json(e.cause.read(), None)
- message = error.get('description') or error['code']
- raise ExtractorError(
- '%s said: %s' % (self.IE_NAME, message), expected=True)
- raise
-
- callback_page, urlh = self._download_webpage_handle(
- 'https://login.linuxacademy.com/login/callback', None,
- 'Downloading callback page',
- data=urlencode_postdata(self._hidden_inputs(login_page)),
- headers={
- 'Content-Type': 'application/x-www-form-urlencoded',
- 'Origin': 'https://login.linuxacademy.com',
- 'Referer': login_state_url,
- })
-
- access_token = self._search_regex(
- r'access_token=([^=&]+)', compat_str(urlh.geturl()),
- 'access token')
-
- self._download_webpage(
- 'https://linuxacademy.com/cp/login/tokenValidateLogin/token/%s'
- % access_token, None, 'Downloading token validation page')
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- chapter_id, lecture_id, course_id = mobj.group('chapter_id', 'lesson_id', 'course_id')
- item_id = course_id if course_id else '%s-%s' % (chapter_id, lecture_id)
-
- webpage = self._download_webpage(url, item_id)
-
- # course path
- if course_id:
- entries = [
- self.url_result(
- urljoin(url, lesson_url), ie=LinuxAcademyIE.ie_key())
- for lesson_url in orderedSet(re.findall(
- r'<a[^>]+\bhref=["\'](/cp/courses/lesson/course/\d+/lesson/\d+/module/\d+)',
- webpage))]
- title = unescapeHTML(self._html_search_regex(
- (r'class=["\']course-title["\'][^>]*>(?P<value>[^<]+)',
- r'var\s+title\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'),
- webpage, 'title', default=None, group='value'))
- description = unescapeHTML(self._html_search_regex(
- r'var\s+description\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
- webpage, 'description', default=None, group='value'))
- return self.playlist_result(entries, course_id, title, description)
-
- # single video path
- info = self._extract_jwplayer_data(
- webpage, item_id, require_title=False, m3u8_id='hls',)
- title = self._search_regex(
- (r'>Lecture\s*:\s*(?P<value>[^<]+)',
- r'lessonName\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
- 'title', group='value')
- info.update({
- 'id': item_id,
- 'title': title,
- })
- return info
diff --git a/youtube_dl/extractor/lnkgo.py b/youtube_dl/extractor/lnkgo.py
deleted file mode 100644
index cfec0d3d0..000000000
--- a/youtube_dl/extractor/lnkgo.py
+++ /dev/null
@@ -1,116 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- unified_strdate,
-)
-
-
-class LnkGoIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?lnkgo\.(?:alfa\.)?lt/visi-video/(?P<show>[^/]+)/ziurek-(?P<id>[A-Za-z0-9-]+)'
- _TESTS = [{
- 'url': 'http://lnkgo.alfa.lt/visi-video/yra-kaip-yra/ziurek-yra-kaip-yra-162',
- 'info_dict': {
- 'id': '46712',
- 'ext': 'mp4',
- 'title': 'Yra kaip yra',
- 'upload_date': '20150107',
- 'description': 'md5:d82a5e36b775b7048617f263a0e3475e',
- 'age_limit': 7,
- 'duration': 3019,
- 'thumbnail': r're:^https?://.*\.jpg$'
- },
- 'params': {
- 'skip_download': True, # HLS download
- },
- }, {
- 'url': 'http://lnkgo.alfa.lt/visi-video/aktualai-pratesimas/ziurek-nerdas-taiso-kompiuteri-2',
- 'info_dict': {
- 'id': '47289',
- 'ext': 'mp4',
- 'title': 'Nėrdas: Kompiuterio Valymas',
- 'upload_date': '20150113',
- 'description': 'md5:7352d113a242a808676ff17e69db6a69',
- 'age_limit': 18,
- 'duration': 346,
- 'thumbnail': r're:^https?://.*\.jpg$'
- },
- 'params': {
- 'skip_download': True, # HLS download
- },
- }, {
- 'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai',
- 'only_matching': True,
- }]
- _AGE_LIMITS = {
- 'N-7': 7,
- 'N-14': 14,
- 'S': 18,
- }
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(
- url, display_id, 'Downloading player webpage')
-
- video_id = self._search_regex(
- r'data-ep="([^"]+)"', webpage, 'video ID')
- title = self._og_search_title(webpage)
- description = self._og_search_description(webpage)
- upload_date = unified_strdate(self._search_regex(
- r'class="[^"]*meta-item[^"]*air-time[^"]*">.*?<strong>([^<]+)</strong>', webpage, 'upload date', fatal=False))
-
- thumbnail_w = int_or_none(
- self._og_search_property('image:width', webpage, 'thumbnail width', fatal=False))
- thumbnail_h = int_or_none(
- self._og_search_property('image:height', webpage, 'thumbnail height', fatal=False))
- thumbnail = {
- 'url': self._og_search_thumbnail(webpage),
- }
- if thumbnail_w and thumbnail_h:
- thumbnail.update({
- 'width': thumbnail_w,
- 'height': thumbnail_h,
- })
-
- config = self._parse_json(self._search_regex(
- r'episodePlayer\((\{.*?\}),\s*\{', webpage, 'sources'), video_id)
-
- if config.get('pGeo'):
- self.report_warning(
- 'This content might not be available in your country due to copyright reasons')
-
- formats = [{
- 'format_id': 'hls',
- 'ext': 'mp4',
- 'url': config['EpisodeVideoLink_HLS'],
- }]
-
- m = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<play_path>.+)$', config['EpisodeVideoLink'])
- if m:
- formats.append({
- 'format_id': 'rtmp',
- 'ext': 'flv',
- 'url': m.group('url'),
- 'play_path': m.group('play_path'),
- 'page_url': url,
- })
-
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'formats': formats,
- 'thumbnails': [thumbnail],
- 'duration': int_or_none(config.get('VideoTime')),
- 'description': description,
- 'age_limit': self._AGE_LIMITS.get(config.get('PGRating'), 0),
- 'upload_date': upload_date,
- }
diff --git a/youtube_dl/extractor/macgamestore.py b/youtube_dl/extractor/macgamestore.py
deleted file mode 100644
index 43db9929c..000000000
--- a/youtube_dl/extractor/macgamestore.py
+++ /dev/null
@@ -1,42 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import ExtractorError
-
-
-class MacGameStoreIE(InfoExtractor):
- IE_NAME = 'macgamestore'
- IE_DESC = 'MacGameStore trailers'
- _VALID_URL = r'https?://(?:www\.)?macgamestore\.com/mediaviewer\.php\?trailer=(?P<id>\d+)'
-
- _TEST = {
- 'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450',
- 'md5': '8649b8ea684b6666b4c5be736ecddc61',
- 'info_dict': {
- 'id': '2450',
- 'ext': 'm4v',
- 'title': 'Crow',
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(
- url, video_id, 'Downloading trailer page')
-
- if '>Missing Media<' in webpage:
- raise ExtractorError(
- 'Trailer %s does not exist' % video_id, expected=True)
-
- video_title = self._html_search_regex(
- r'<title>MacGameStore: (.*?) Trailer</title>', webpage, 'title')
-
- video_url = self._html_search_regex(
- r'(?s)<div\s+id="video-player".*?href="([^"]+)"\s*>',
- webpage, 'video URL')
-
- return {
- 'id': video_id,
- 'url': video_url,
- 'title': video_title
- }
diff --git a/youtube_dl/extractor/mailru.py b/youtube_dl/extractor/mailru.py
deleted file mode 100644
index 6b0e64b7f..000000000
--- a/youtube_dl/extractor/mailru.py
+++ /dev/null
@@ -1,314 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import itertools
-import json
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote
-from ..utils import (
- int_or_none,
- parse_duration,
- remove_end,
- try_get,
-)
-
-
-class MailRuIE(InfoExtractor):
- IE_NAME = 'mailru'
- IE_DESC = 'Видео@Mail.Ru'
- _VALID_URL = r'''(?x)
- https?://
- (?:(?:www|m)\.)?my\.mail\.ru/
- (?:
- video/.*\#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|
- (?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html|
- (?:video/embed|\+/video/meta)/(?P<metaid>\d+)
- )
- '''
- _TESTS = [
- {
- 'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76',
- 'md5': 'dea205f03120046894db4ebb6159879a',
- 'info_dict': {
- 'id': '46301138_76',
- 'ext': 'mp4',
- 'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро',
- 'timestamp': 1393235077,
- 'upload_date': '20140224',
- 'uploader': 'sonypicturesrus',
- 'uploader_id': 'sonypicturesrus@mail.ru',
- 'duration': 184,
- },
- 'skip': 'Not accessible from Travis CI server',
- },
- {
- 'url': 'http://my.mail.ru/corp/hitech/video/news_hi-tech_mail_ru/1263.html',
- 'md5': '00a91a58c3402204dcced523777b475f',
- 'info_dict': {
- 'id': '46843144_1263',
- 'ext': 'mp4',
- 'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion',
- 'timestamp': 1397039888,
- 'upload_date': '20140409',
- 'uploader': 'hitech',
- 'uploader_id': 'hitech@corp.mail.ru',
- 'duration': 245,
- },
- 'skip': 'Not accessible from Travis CI server',
- },
- {
- # only available via metaUrl API
- 'url': 'http://my.mail.ru/mail/720pizle/video/_myvideo/502.html',
- 'md5': '3b26d2491c6949d031a32b96bd97c096',
- 'info_dict': {
- 'id': '56664382_502',
- 'ext': 'mp4',
- 'title': ':8336',
- 'timestamp': 1449094163,
- 'upload_date': '20151202',
- 'uploader': '720pizle@mail.ru',
- 'uploader_id': '720pizle@mail.ru',
- 'duration': 6001,
- },
- 'skip': 'Not accessible from Travis CI server',
- },
- {
- 'url': 'http://m.my.mail.ru/mail/3sktvtr/video/_myvideo/138.html',
- 'only_matching': True,
- },
- {
- 'url': 'https://my.mail.ru/video/embed/7949340477499637815',
- 'only_matching': True,
- },
- {
- 'url': 'http://my.mail.ru/+/video/meta/7949340477499637815',
- 'only_matching': True,
- }
- ]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- meta_id = mobj.group('metaid')
-
- video_id = None
- if meta_id:
- meta_url = 'https://my.mail.ru/+/video/meta/%s' % meta_id
- else:
- video_id = mobj.group('idv1')
- if not video_id:
- video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix')
- webpage = self._download_webpage(url, video_id)
- page_config = self._parse_json(self._search_regex(
- r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>',
- webpage, 'page config', default='{}'), video_id, fatal=False)
- if page_config:
- meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl')
- else:
- meta_url = None
-
- video_data = None
- if meta_url:
- video_data = self._download_json(
- meta_url, video_id or meta_id, 'Downloading video meta JSON',
- fatal=not video_id)
-
- # Fallback old approach
- if not video_data:
- video_data = self._download_json(
- 'http://api.video.mail.ru/videos/%s.json?new=1' % video_id,
- video_id, 'Downloading video JSON')
-
- formats = []
- for f in video_data['videos']:
- video_url = f.get('url')
- if not video_url:
- continue
- format_id = f.get('key')
- height = int_or_none(self._search_regex(
- r'^(\d+)[pP]$', format_id, 'height', default=None)) if format_id else None
- formats.append({
- 'url': video_url,
- 'format_id': format_id,
- 'height': height,
- })
- self._sort_formats(formats)
-
- meta_data = video_data['meta']
- title = remove_end(meta_data['title'], '.mp4')
-
- author = video_data.get('author')
- uploader = author.get('name')
- uploader_id = author.get('id') or author.get('email')
- view_count = int_or_none(video_data.get('viewsCount') or video_data.get('views_count'))
-
- acc_id = meta_data.get('accId')
- item_id = meta_data.get('itemId')
- content_id = '%s_%s' % (acc_id, item_id) if acc_id and item_id else video_id
-
- thumbnail = meta_data.get('poster')
- duration = int_or_none(meta_data.get('duration'))
- timestamp = int_or_none(meta_data.get('timestamp'))
-
- return {
- 'id': content_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'timestamp': timestamp,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'duration': duration,
- 'view_count': view_count,
- 'formats': formats,
- }
-
-
-class MailRuMusicSearchBaseIE(InfoExtractor):
- def _search(self, query, url, audio_id, limit=100, offset=0):
- search = self._download_json(
- 'https://my.mail.ru/cgi-bin/my/ajax', audio_id,
- 'Downloading songs JSON page %d' % (offset // limit + 1),
- headers={
- 'Referer': url,
- 'X-Requested-With': 'XMLHttpRequest',
- }, query={
- 'xemail': '',
- 'ajax_call': '1',
- 'func_name': 'music.search',
- 'mna': '',
- 'mnb': '',
- 'arg_query': query,
- 'arg_extended': '1',
- 'arg_search_params': json.dumps({
- 'music': {
- 'limit': limit,
- 'offset': offset,
- },
- }),
- 'arg_limit': limit,
- 'arg_offset': offset,
- })
- return next(e for e in search if isinstance(e, dict))
-
- @staticmethod
- def _extract_track(t, fatal=True):
- audio_url = t['URL'] if fatal else t.get('URL')
- if not audio_url:
- return
-
- audio_id = t['File'] if fatal else t.get('File')
- if not audio_id:
- return
-
- thumbnail = t.get('AlbumCoverURL') or t.get('FiledAlbumCover')
- uploader = t.get('OwnerName') or t.get('OwnerName_Text_HTML')
- uploader_id = t.get('UploaderID')
- duration = int_or_none(t.get('DurationInSeconds')) or parse_duration(
- t.get('Duration') or t.get('DurationStr'))
- view_count = int_or_none(t.get('PlayCount') or t.get('PlayCount_hr'))
-
- track = t.get('Name') or t.get('Name_Text_HTML')
- artist = t.get('Author') or t.get('Author_Text_HTML')
-
- if track:
- title = '%s - %s' % (artist, track) if artist else track
- else:
- title = audio_id
-
- return {
- 'extractor_key': MailRuMusicIE.ie_key(),
- 'id': audio_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'duration': duration,
- 'view_count': view_count,
- 'vcodec': 'none',
- 'abr': int_or_none(t.get('BitRate')),
- 'track': track,
- 'artist': artist,
- 'album': t.get('Album'),
- 'url': audio_url,
- }
-
-
-class MailRuMusicIE(MailRuMusicSearchBaseIE):
- IE_NAME = 'mailru:music'
- IE_DESC = 'Музыка@Mail.Ru'
- _VALID_URL = r'https?://my\.mail\.ru/music/songs/[^/?#&]+-(?P<id>[\da-f]+)'
- _TESTS = [{
- 'url': 'https://my.mail.ru/music/songs/%D0%BC8%D0%BB8%D1%82%D1%85-l-a-h-luciferian-aesthetics-of-herrschaft-single-2017-4e31f7125d0dfaef505d947642366893',
- 'md5': '0f8c22ef8c5d665b13ac709e63025610',
- 'info_dict': {
- 'id': '4e31f7125d0dfaef505d947642366893',
- 'ext': 'mp3',
- 'title': 'L.A.H. (Luciferian Aesthetics of Herrschaft) single, 2017 - М8Л8ТХ',
- 'uploader': 'Игорь Мудрый',
- 'uploader_id': '1459196328',
- 'duration': 280,
- 'view_count': int,
- 'vcodec': 'none',
- 'abr': 320,
- 'track': 'L.A.H. (Luciferian Aesthetics of Herrschaft) single, 2017',
- 'artist': 'М8Л8ТХ',
- },
- }]
-
- def _real_extract(self, url):
- audio_id = self._match_id(url)
-
- webpage = self._download_webpage(url, audio_id)
-
- title = self._og_search_title(webpage)
- music_data = self._search(title, url, audio_id)['MusicData']
- t = next(t for t in music_data if t.get('File') == audio_id)
-
- info = self._extract_track(t)
- info['title'] = title
- return info
-
-
-class MailRuMusicSearchIE(MailRuMusicSearchBaseIE):
- IE_NAME = 'mailru:music:search'
- IE_DESC = 'Музыка@Mail.Ru'
- _VALID_URL = r'https?://my\.mail\.ru/music/search/(?P<id>[^/?#&]+)'
- _TESTS = [{
- 'url': 'https://my.mail.ru/music/search/black%20shadow',
- 'info_dict': {
- 'id': 'black shadow',
- },
- 'playlist_mincount': 532,
- }]
-
- def _real_extract(self, url):
- query = compat_urllib_parse_unquote(self._match_id(url))
-
- entries = []
-
- LIMIT = 100
- offset = 0
-
- for _ in itertools.count(1):
- search = self._search(query, url, query, LIMIT, offset)
-
- music_data = search.get('MusicData')
- if not music_data or not isinstance(music_data, list):
- break
-
- for t in music_data:
- track = self._extract_track(t, fatal=False)
- if track:
- entries.append(track)
-
- total = try_get(
- search, lambda x: x['Results']['music']['Total'], int)
-
- if total is not None:
- if offset > total:
- break
-
- offset += LIMIT
-
- return self.playlist_result(entries, query)
diff --git a/youtube_dl/extractor/makertv.py b/youtube_dl/extractor/makertv.py
deleted file mode 100644
index 8eda69cfc..000000000
--- a/youtube_dl/extractor/makertv.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-
-
-class MakerTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:(?:www\.)?maker\.tv/(?:[^/]+/)*video|makerplayer\.com/embed/maker)/(?P<id>[a-zA-Z0-9]{12})'
- _TEST = {
- 'url': 'http://www.maker.tv/video/Fh3QgymL9gsc',
- 'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',
- 'info_dict': {
- 'id': 'Fh3QgymL9gsc',
- 'ext': 'mp4',
- 'title': 'Maze Runner: The Scorch Trials Official Movie Review',
- 'description': 'md5:11ff3362d7ef1d679fdb649f6413975a',
- 'upload_date': '20150918',
- 'timestamp': 1442549540,
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- jwplatform_id = self._search_regex(r'jw_?id="([^"]+)"', webpage, 'jwplatform id')
-
- return {
- '_type': 'url_transparent',
- 'id': video_id,
- 'url': 'jwplatform:%s' % jwplatform_id,
- 'ie_key': 'JWPlatform',
- }
diff --git a/youtube_dl/extractor/malltv.py b/youtube_dl/extractor/malltv.py
deleted file mode 100644
index e13c2e11a..000000000
--- a/youtube_dl/extractor/malltv.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import merge_dicts
-
-
-class MallTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?mall\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
- _TESTS = [{
- 'url': 'https://www.mall.tv/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
- 'md5': '1c4a37f080e1f3023103a7b43458e518',
- 'info_dict': {
- 'id': 't0zzt0',
- 'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
- 'ext': 'mp4',
- 'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?',
- 'description': 'md5:25fc0ec42a72ba602b602c683fa29deb',
- 'duration': 216,
- 'timestamp': 1538870400,
- 'upload_date': '20181007',
- 'view_count': int,
- }
- }, {
- 'url': 'https://www.mall.tv/kdo-to-plati/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(
- url, display_id, headers=self.geo_verification_headers())
-
- SOURCE_RE = r'(<source[^>]+\bsrc=(?:(["\'])(?:(?!\2).)+|[^\s]+)/(?P<id>[\da-z]+)/index)\b'
- video_id = self._search_regex(
- SOURCE_RE, webpage, 'video id', group='id')
-
- media = self._parse_html5_media_entries(
- url, re.sub(SOURCE_RE, r'\1.m3u8', webpage), video_id,
- m3u8_id='hls', m3u8_entry_protocol='m3u8_native')[0]
-
- info = self._search_json_ld(webpage, video_id, default={})
-
- return merge_dicts(media, info, {
- 'id': video_id,
- 'display_id': display_id,
- 'title': self._og_search_title(webpage, default=None) or display_id,
- 'description': self._og_search_description(webpage, default=None),
- 'thumbnail': self._og_search_thumbnail(webpage, default=None),
- })
diff --git a/youtube_dl/extractor/mangomolo.py b/youtube_dl/extractor/mangomolo.py
deleted file mode 100644
index 482175a34..000000000
--- a/youtube_dl/extractor/mangomolo.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..compat import (
- compat_b64decode,
- compat_urllib_parse_unquote,
-)
-from ..utils import int_or_none
-
-
-class MangomoloBaseIE(InfoExtractor):
- def _get_real_id(self, page_id):
- return page_id
-
- def _real_extract(self, url):
- page_id = self._get_real_id(self._match_id(url))
- webpage = self._download_webpage(url, page_id)
- hidden_inputs = self._hidden_inputs(webpage)
- m3u8_entry_protocol = 'm3u8' if self._IS_LIVE else 'm3u8_native'
-
- format_url = self._html_search_regex(
- [
- r'file\s*:\s*"(https?://[^"]+?/playlist\.m3u8)',
- r'<a[^>]+href="(rtsp://[^"]+)"'
- ], webpage, 'format url')
- formats = self._extract_wowza_formats(
- format_url, page_id, m3u8_entry_protocol, ['smil'])
- self._sort_formats(formats)
-
- return {
- 'id': page_id,
- 'title': self._live_title(page_id) if self._IS_LIVE else page_id,
- 'uploader_id': hidden_inputs.get('userid'),
- 'duration': int_or_none(hidden_inputs.get('duration')),
- 'is_live': self._IS_LIVE,
- 'formats': formats,
- }
-
-
-class MangomoloVideoIE(MangomoloBaseIE):
- IE_NAME = 'mangomolo:video'
- _VALID_URL = r'https?://admin\.mangomolo\.com/analytics/index\.php/customers/embed/video\?.*?\bid=(?P<id>\d+)'
- _IS_LIVE = False
-
-
-class MangomoloLiveIE(MangomoloBaseIE):
- IE_NAME = 'mangomolo:live'
- _VALID_URL = r'https?://admin\.mangomolo\.com/analytics/index\.php/customers/embed/index\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)'
- _IS_LIVE = True
-
- def _get_real_id(self, page_id):
- return compat_b64decode(compat_urllib_parse_unquote(page_id)).decode()
diff --git a/youtube_dl/extractor/mediaset.py b/youtube_dl/extractor/mediaset.py
deleted file mode 100644
index df3748798..000000000
--- a/youtube_dl/extractor/mediaset.py
+++ /dev/null
@@ -1,163 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .theplatform import ThePlatformBaseIE
-from ..compat import (
- compat_parse_qs,
- compat_str,
- compat_urllib_parse_urlparse,
-)
-from ..utils import (
- ExtractorError,
- int_or_none,
- update_url_query,
-)
-
-
-class MediasetIE(ThePlatformBaseIE):
- _TP_TLD = 'eu'
- _VALID_URL = r'''(?x)
- (?:
- mediaset:|
- https?://
- (?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
- (?:
- (?:video|on-demand)/(?:[^/]+/)+[^/]+_|
- player/index\.html\?.*?\bprogramGuid=
- )
- )(?P<id>[0-9A-Z]{16})
- '''
- _TESTS = [{
- # full episode
- 'url': 'https://www.mediasetplay.mediaset.it/video/hellogoodbye/quarta-puntata_FAFU000000661824',
- 'md5': '9b75534d42c44ecef7bf1ffeacb7f85d',
- 'info_dict': {
- 'id': 'FAFU000000661824',
- 'ext': 'mp4',
- 'title': 'Quarta puntata',
- 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 1414.26,
- 'upload_date': '20161107',
- 'series': 'Hello Goodbye',
- 'timestamp': 1478532900,
- 'uploader': 'Rete 4',
- 'uploader_id': 'R4',
- },
- }, {
- 'url': 'https://www.mediasetplay.mediaset.it/video/matrix/puntata-del-25-maggio_F309013801000501',
- 'md5': '288532f0ad18307705b01e581304cd7b',
- 'info_dict': {
- 'id': 'F309013801000501',
- 'ext': 'mp4',
- 'title': 'Puntata del 25 maggio',
- 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 6565.007,
- 'upload_date': '20180526',
- 'series': 'Matrix',
- 'timestamp': 1527326245,
- 'uploader': 'Canale 5',
- 'uploader_id': 'C5',
- },
- 'expected_warnings': ['HTTP Error 403: Forbidden'],
- }, {
- # clip
- 'url': 'https://www.mediasetplay.mediaset.it/video/gogglebox/un-grande-classico-della-commedia-sexy_FAFU000000661680',
- 'only_matching': True,
- }, {
- # iframe simple
- 'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665924&id=665924',
- 'only_matching': True,
- }, {
- # iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/)
- 'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665104&id=665104',
- 'only_matching': True,
- }, {
- 'url': 'mediaset:FAFU000000665924',
- 'only_matching': True,
- }]
-
- @staticmethod
- def _extract_urls(ie, webpage):
- def _qs(url):
- return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
-
- def _program_guid(qs):
- return qs.get('programGuid', [None])[0]
-
- entries = []
- for mobj in re.finditer(
- r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml.*?)\1',
- webpage):
- embed_url = mobj.group('url')
- embed_qs = _qs(embed_url)
- program_guid = _program_guid(embed_qs)
- if program_guid:
- entries.append(embed_url)
- continue
- video_id = embed_qs.get('id', [None])[0]
- if not video_id:
- continue
- urlh = ie._request_webpage(
- embed_url, video_id, note='Following embed URL redirect')
- embed_url = compat_str(urlh.geturl())
- program_guid = _program_guid(_qs(embed_url))
- if program_guid:
- entries.append(embed_url)
- return entries
-
- def _real_extract(self, url):
- guid = self._match_id(url)
- tp_path = 'PR1GhC/media/guid/2702976343/' + guid
- info = self._extract_theplatform_metadata(tp_path, guid)
-
- formats = []
- subtitles = {}
- first_e = None
- for asset_type in ('SD', 'HD'):
- for f in ('MPEG4', 'MPEG-DASH', 'M3U', 'ISM'):
- try:
- tp_formats, tp_subtitles = self._extract_theplatform_smil(
- update_url_query('http://link.theplatform.%s/s/%s' % (self._TP_TLD, tp_path), {
- 'mbr': 'true',
- 'formats': f,
- 'assetTypes': asset_type,
- }), guid, 'Downloading %s %s SMIL data' % (f, asset_type))
- except ExtractorError as e:
- if not first_e:
- first_e = e
- break
- for tp_f in tp_formats:
- tp_f['quality'] = 1 if asset_type == 'HD' else 0
- formats.extend(tp_formats)
- subtitles = self._merge_subtitles(subtitles, tp_subtitles)
- if first_e and not formats:
- raise first_e
- self._sort_formats(formats)
-
- fields = []
- for templ, repls in (('tvSeason%sNumber', ('', 'Episode')), ('mediasetprogram$%s', ('brandTitle', 'numberOfViews', 'publishInfo'))):
- fields.extend(templ % repl for repl in repls)
- feed_data = self._download_json(
- 'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs/guid/-/' + guid,
- guid, fatal=False, query={'fields': ','.join(fields)})
- if feed_data:
- publish_info = feed_data.get('mediasetprogram$publishInfo') or {}
- info.update({
- 'episode_number': int_or_none(feed_data.get('tvSeasonEpisodeNumber')),
- 'season_number': int_or_none(feed_data.get('tvSeasonNumber')),
- 'series': feed_data.get('mediasetprogram$brandTitle'),
- 'uploader': publish_info.get('description'),
- 'uploader_id': publish_info.get('channel'),
- 'view_count': int_or_none(feed_data.get('mediasetprogram$numberOfViews')),
- })
-
- info.update({
- 'id': guid,
- 'formats': formats,
- 'subtitles': subtitles,
- })
- return info
diff --git a/youtube_dl/extractor/mediasite.py b/youtube_dl/extractor/mediasite.py
deleted file mode 100644
index 694a264d6..000000000
--- a/youtube_dl/extractor/mediasite.py
+++ /dev/null
@@ -1,366 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-import json
-
-from .common import InfoExtractor
-from ..compat import (
- compat_str,
- compat_urlparse,
-)
-from ..utils import (
- ExtractorError,
- float_or_none,
- mimetype2ext,
- str_or_none,
- try_get,
- unescapeHTML,
- unsmuggle_url,
- url_or_none,
- urljoin,
-)
-
-
-_ID_RE = r'(?:[0-9a-f]{32,34}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12,14})'
-
-
-class MediasiteIE(InfoExtractor):
- _VALID_URL = r'(?xi)https?://[^/]+/Mediasite/(?:Play|Showcase/(?:default|livebroadcast)/Presentation)/(?P<id>%s)(?P<query>\?[^#]+|)' % _ID_RE
- _TESTS = [
- {
- 'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271681e4f199af3c60d1f82869b1d',
- 'info_dict': {
- 'id': '2db6c271681e4f199af3c60d1f82869b1d',
- 'ext': 'mp4',
- 'title': 'Lecture: Tuesday, September 20, 2016 - Sir Andrew Wiles',
- 'description': 'Sir Andrew Wiles: “Equations in arithmetic”\\n\\nI will describe some of the interactions between modern number theory and the problem of solving equations in rational numbers or integers\\u0027.',
- 'timestamp': 1474268400.0,
- 'upload_date': '20160919',
- },
- },
- {
- 'url': 'http://mediasite.uib.no/Mediasite/Play/90bb363295d945d6b548c867d01181361d?catalog=a452b7df-9ae1-46b7-a3ba-aceeb285f3eb',
- 'info_dict': {
- 'id': '90bb363295d945d6b548c867d01181361d',
- 'ext': 'mp4',
- 'upload_date': '20150429',
- 'title': '5) IT-forum 2015-Dag 1 - Dungbeetle - How and why Rain created a tiny bug tracker for Unity',
- 'timestamp': 1430311380.0,
- },
- },
- {
- 'url': 'https://collegerama.tudelft.nl/Mediasite/Play/585a43626e544bdd97aeb71a0ec907a01d',
- 'md5': '481fda1c11f67588c0d9d8fbdced4e39',
- 'info_dict': {
- 'id': '585a43626e544bdd97aeb71a0ec907a01d',
- 'ext': 'mp4',
- 'title': 'Een nieuwe wereld: waarden, bewustzijn en techniek van de mensheid 2.0.',
- 'description': '',
- 'thumbnail': r're:^https?://.*\.jpg(?:\?.*)?$',
- 'duration': 7713.088,
- 'timestamp': 1413309600,
- 'upload_date': '20141014',
- },
- },
- {
- 'url': 'https://collegerama.tudelft.nl/Mediasite/Play/86a9ea9f53e149079fbdb4202b521ed21d?catalog=fd32fd35-6c99-466c-89d4-cd3c431bc8a4',
- 'md5': 'ef1fdded95bdf19b12c5999949419c92',
- 'info_dict': {
- 'id': '86a9ea9f53e149079fbdb4202b521ed21d',
- 'ext': 'wmv',
- 'title': '64ste Vakantiecursus: Afvalwater',
- 'description': 'md5:7fd774865cc69d972f542b157c328305',
- 'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',
- 'duration': 10853,
- 'timestamp': 1326446400,
- 'upload_date': '20120113',
- },
- },
- {
- 'url': 'http://digitalops.sandia.gov/Mediasite/Play/24aace4429fc450fb5b38cdbf424a66e1d',
- 'md5': '9422edc9b9a60151727e4b6d8bef393d',
- 'info_dict': {
- 'id': '24aace4429fc450fb5b38cdbf424a66e1d',
- 'ext': 'mp4',
- 'title': 'Xyce Software Training - Section 1',
- 'description': r're:(?s)SAND Number: SAND 2013-7800.{200,}',
- 'upload_date': '20120409',
- 'timestamp': 1333983600,
- 'duration': 7794,
- }
- },
- {
- 'url': 'https://collegerama.tudelft.nl/Mediasite/Showcase/livebroadcast/Presentation/ada7020854f743c49fbb45c9ec7dbb351d',
- 'only_matching': True,
- },
- {
- 'url': 'https://mediasite.ntnu.no/Mediasite/Showcase/default/Presentation/7d8b913259334b688986e970fae6fcb31d',
- 'only_matching': True,
- },
- {
- # dashed id
- 'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271-681e-4f19-9af3-c60d1f82869b1d',
- 'only_matching': True,
- }
- ]
-
- # look in Mediasite.Core.js (Mediasite.ContentStreamType[*])
- _STREAM_TYPES = {
- 0: 'video1', # the main video
- 2: 'slide',
- 3: 'presentation',
- 4: 'video2', # screencast?
- 5: 'video3',
- }
-
- @staticmethod
- def _extract_urls(webpage):
- return [
- unescapeHTML(mobj.group('url'))
- for mobj in re.finditer(
- r'(?xi)<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:(?:https?:)?//[^/]+)?/Mediasite/Play/%s(?:\?.*?)?)\1' % _ID_RE,
- webpage)]
-
- def _real_extract(self, url):
- url, data = unsmuggle_url(url, {})
- mobj = re.match(self._VALID_URL, url)
- resource_id = mobj.group('id')
- query = mobj.group('query')
-
- webpage, urlh = self._download_webpage_handle(url, resource_id) # XXX: add UrlReferrer?
- redirect_url = compat_str(urlh.geturl())
-
- # XXX: might have also extracted UrlReferrer and QueryString from the html
- service_path = compat_urlparse.urljoin(redirect_url, self._html_search_regex(
- r'<div[^>]+\bid=["\']ServicePath[^>]+>(.+?)</div>', webpage, resource_id,
- default='/Mediasite/PlayerService/PlayerService.svc/json'))
-
- player_options = self._download_json(
- '%s/GetPlayerOptions' % service_path, resource_id,
- headers={
- 'Content-type': 'application/json; charset=utf-8',
- 'X-Requested-With': 'XMLHttpRequest',
- },
- data=json.dumps({
- 'getPlayerOptionsRequest': {
- 'ResourceId': resource_id,
- 'QueryString': query,
- 'UrlReferrer': data.get('UrlReferrer', ''),
- 'UseScreenReader': False,
- }
- }).encode('utf-8'))['d']
-
- presentation = player_options['Presentation']
- title = presentation['Title']
-
- if presentation is None:
- raise ExtractorError(
- 'Mediasite says: %s' % player_options['PlayerPresentationStatusMessage'],
- expected=True)
-
- thumbnails = []
- formats = []
- for snum, Stream in enumerate(presentation['Streams']):
- stream_type = Stream.get('StreamType')
- if stream_type is None:
- continue
-
- video_urls = Stream.get('VideoUrls')
- if not isinstance(video_urls, list):
- video_urls = []
-
- stream_id = self._STREAM_TYPES.get(
- stream_type, 'type%u' % stream_type)
-
- stream_formats = []
- for unum, VideoUrl in enumerate(video_urls):
- video_url = url_or_none(VideoUrl.get('Location'))
- if not video_url:
- continue
- # XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS
-
- media_type = VideoUrl.get('MediaType')
- if media_type == 'SS':
- stream_formats.extend(self._extract_ism_formats(
- video_url, resource_id,
- ism_id='%s-%u.%u' % (stream_id, snum, unum),
- fatal=False))
- elif media_type == 'Dash':
- stream_formats.extend(self._extract_mpd_formats(
- video_url, resource_id,
- mpd_id='%s-%u.%u' % (stream_id, snum, unum),
- fatal=False))
- else:
- stream_formats.append({
- 'format_id': '%s-%u.%u' % (stream_id, snum, unum),
- 'url': video_url,
- 'ext': mimetype2ext(VideoUrl.get('MimeType')),
- })
-
- # TODO: if Stream['HasSlideContent']:
- # synthesise an MJPEG video stream '%s-%u.slides' % (stream_type, snum)
- # from Stream['Slides']
- # this will require writing a custom downloader...
-
- # disprefer 'secondary' streams
- if stream_type != 0:
- for fmt in stream_formats:
- fmt['preference'] = -1
-
- thumbnail_url = Stream.get('ThumbnailUrl')
- if thumbnail_url:
- thumbnails.append({
- 'id': '%s-%u' % (stream_id, snum),
- 'url': urljoin(redirect_url, thumbnail_url),
- 'preference': -1 if stream_type != 0 else 0,
- })
- formats.extend(stream_formats)
-
- self._sort_formats(formats)
-
- # XXX: Presentation['Presenters']
- # XXX: Presentation['Transcript']
-
- return {
- 'id': resource_id,
- 'title': title,
- 'description': presentation.get('Description'),
- 'duration': float_or_none(presentation.get('Duration'), 1000),
- 'timestamp': float_or_none(presentation.get('UnixTime'), 1000),
- 'formats': formats,
- 'thumbnails': thumbnails,
- }
-
-
-class MediasiteCatalogIE(InfoExtractor):
- _VALID_URL = r'''(?xi)
- (?P<url>https?://[^/]+/Mediasite)
- /Catalog/Full/
- (?P<catalog_id>{0})
- (?:
- /(?P<current_folder_id>{0})
- /(?P<root_dynamic_folder_id>{0})
- )?
- '''.format(_ID_RE)
- _TESTS = [{
- 'url': 'http://events7.mediasite.com/Mediasite/Catalog/Full/631f9e48530d454381549f955d08c75e21',
- 'info_dict': {
- 'id': '631f9e48530d454381549f955d08c75e21',
- 'title': 'WCET Summit: Adaptive Learning in Higher Ed: Improving Outcomes Dynamically',
- },
- 'playlist_count': 6,
- 'expected_warnings': ['is not a supported codec'],
- }, {
- # with CurrentFolderId and RootDynamicFolderId
- 'url': 'https://medaudio.medicine.iu.edu/Mediasite/Catalog/Full/9518c4a6c5cf4993b21cbd53e828a92521/97a9db45f7ab47428c77cd2ed74bb98f14/9518c4a6c5cf4993b21cbd53e828a92521',
- 'info_dict': {
- 'id': '9518c4a6c5cf4993b21cbd53e828a92521',
- 'title': 'IUSM Family and Friends Sessions',
- },
- 'playlist_count': 2,
- }, {
- 'url': 'http://uipsyc.mediasite.com/mediasite/Catalog/Full/d5d79287c75243c58c50fef50174ec1b21',
- 'only_matching': True,
- }, {
- # no AntiForgeryToken
- 'url': 'https://live.libraries.psu.edu/Mediasite/Catalog/Full/8376d4b24dd1457ea3bfe4cf9163feda21',
- 'only_matching': True,
- }, {
- 'url': 'https://medaudio.medicine.iu.edu/Mediasite/Catalog/Full/9518c4a6c5cf4993b21cbd53e828a92521/97a9db45f7ab47428c77cd2ed74bb98f14/9518c4a6c5cf4993b21cbd53e828a92521',
- 'only_matching': True,
- }, {
- # dashed id
- 'url': 'http://events7.mediasite.com/Mediasite/Catalog/Full/631f9e48-530d-4543-8154-9f955d08c75e',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- mediasite_url = mobj.group('url')
- catalog_id = mobj.group('catalog_id')
- current_folder_id = mobj.group('current_folder_id') or catalog_id
- root_dynamic_folder_id = mobj.group('root_dynamic_folder_id')
-
- webpage = self._download_webpage(url, catalog_id)
-
- # AntiForgeryToken is optional (e.g. [1])
- # 1. https://live.libraries.psu.edu/Mediasite/Catalog/Full/8376d4b24dd1457ea3bfe4cf9163feda21
- anti_forgery_token = self._search_regex(
- r'AntiForgeryToken\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
- webpage, 'anti forgery token', default=None, group='value')
- if anti_forgery_token:
- anti_forgery_header = self._search_regex(
- r'AntiForgeryHeaderName\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
- webpage, 'anti forgery header name',
- default='X-SOFO-AntiForgeryHeader', group='value')
-
- data = {
- 'IsViewPage': True,
- 'IsNewFolder': True,
- 'AuthTicket': None,
- 'CatalogId': catalog_id,
- 'CurrentFolderId': current_folder_id,
- 'RootDynamicFolderId': root_dynamic_folder_id,
- 'ItemsPerPage': 1000,
- 'PageIndex': 0,
- 'PermissionMask': 'Execute',
- 'CatalogSearchType': 'SearchInFolder',
- 'SortBy': 'Date',
- 'SortDirection': 'Descending',
- 'StartDate': None,
- 'EndDate': None,
- 'StatusFilterList': None,
- 'PreviewKey': None,
- 'Tags': [],
- }
-
- headers = {
- 'Content-Type': 'application/json; charset=UTF-8',
- 'Referer': url,
- 'X-Requested-With': 'XMLHttpRequest',
- }
- if anti_forgery_token:
- headers[anti_forgery_header] = anti_forgery_token
-
- catalog = self._download_json(
- '%s/Catalog/Data/GetPresentationsForFolder' % mediasite_url,
- catalog_id, data=json.dumps(data).encode(), headers=headers)
-
- entries = []
- for video in catalog['PresentationDetailsList']:
- if not isinstance(video, dict):
- continue
- video_id = str_or_none(video.get('Id'))
- if not video_id:
- continue
- entries.append(self.url_result(
- '%s/Play/%s' % (mediasite_url, video_id),
- ie=MediasiteIE.ie_key(), video_id=video_id))
-
- title = try_get(
- catalog, lambda x: x['CurrentFolder']['Name'], compat_str)
-
- return self.playlist_result(entries, catalog_id, title,)
-
-
-class MediasiteNamedCatalogIE(InfoExtractor):
- _VALID_URL = r'(?xi)(?P<url>https?://[^/]+/Mediasite)/Catalog/catalogs/(?P<catalog_name>[^/?#&]+)'
- _TESTS = [{
- 'url': 'https://msite.misis.ru/Mediasite/Catalog/catalogs/2016-industrial-management-skriabin-o-o',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- mediasite_url = mobj.group('url')
- catalog_name = mobj.group('catalog_name')
-
- webpage = self._download_webpage(url, catalog_name)
-
- catalog_id = self._search_regex(
- r'CatalogId\s*:\s*["\'](%s)' % _ID_RE, webpage, 'catalog id')
-
- return self.url_result(
- '%s/Catalog/Full/%s' % (mediasite_url, catalog_id),
- ie=MediasiteCatalogIE.ie_key(), video_id=catalog_id)
diff --git a/youtube_dl/extractor/minhateca.py b/youtube_dl/extractor/minhateca.py
deleted file mode 100644
index dccc54249..000000000
--- a/youtube_dl/extractor/minhateca.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- parse_duration,
- parse_filesize,
- sanitized_Request,
- urlencode_postdata,
-)
-
-
-class MinhatecaIE(InfoExtractor):
- _VALID_URL = r'https?://minhateca\.com\.br/[^?#]+,(?P<id>[0-9]+)\.'
- _TEST = {
- 'url': 'http://minhateca.com.br/pereba/misc/youtube-dl+test+video,125848331.mp4(video)',
- 'info_dict': {
- 'id': '125848331',
- 'ext': 'mp4',
- 'title': 'youtube-dl test video',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'filesize_approx': 1530000,
- 'duration': 9,
- 'view_count': int,
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- token = self._html_search_regex(
- r'<input name="__RequestVerificationToken".*?value="([^"]+)"',
- webpage, 'request token')
- token_data = [
- ('fileId', video_id),
- ('__RequestVerificationToken', token),
- ]
- req = sanitized_Request(
- 'http://minhateca.com.br/action/License/Download',
- data=urlencode_postdata(token_data))
- req.add_header('Content-Type', 'application/x-www-form-urlencoded')
- data = self._download_json(
- req, video_id, note='Downloading metadata')
-
- video_url = data['redirectUrl']
- title_str = self._html_search_regex(
- r'<h1.*?>(.*?)</h1>', webpage, 'title')
- title, _, ext = title_str.rpartition('.')
- filesize_approx = parse_filesize(self._html_search_regex(
- r'<p class="fileSize">(.*?)</p>',
- webpage, 'file size approximation', fatal=False))
- duration = parse_duration(self._html_search_regex(
- r'(?s)<p class="fileLeng[ht][th]">.*?class="bold">(.*?)<',
- webpage, 'duration', fatal=False))
- view_count = int_or_none(self._html_search_regex(
- r'<p class="downloadsCounter">([0-9]+)</p>',
- webpage, 'view count', fatal=False))
-
- return {
- 'id': video_id,
- 'url': video_url,
- 'title': title,
- 'ext': ext,
- 'filesize_approx': filesize_approx,
- 'duration': duration,
- 'view_count': view_count,
- 'thumbnail': self._og_search_thumbnail(webpage),
- }
diff --git a/youtube_dl/extractor/mit.py b/youtube_dl/extractor/mit.py
deleted file mode 100644
index 1aea78d11..000000000
--- a/youtube_dl/extractor/mit.py
+++ /dev/null
@@ -1,156 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-import json
-
-from .common import InfoExtractor
-from .youtube import YoutubeIE
-from ..utils import (
- clean_html,
- ExtractorError,
- get_element_by_id,
-)
-
-
-class TechTVMITIE(InfoExtractor):
- IE_NAME = 'techtv.mit.edu'
- _VALID_URL = r'https?://techtv\.mit\.edu/(?:videos|embeds)/(?P<id>\d+)'
-
- _TEST = {
- 'url': 'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
- 'md5': '00a3a27ee20d44bcaa0933ccec4a2cf7',
- 'info_dict': {
- 'id': '25418',
- 'ext': 'mp4',
- 'title': 'MIT DNA and Protein Sets',
- 'description': 'md5:46f5c69ce434f0a97e7c628cc142802d',
- },
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- raw_page = self._download_webpage(
- 'http://techtv.mit.edu/videos/%s' % video_id, video_id)
- clean_page = re.compile(r'<!--.*?-->', re.S).sub('', raw_page)
-
- base_url = self._proto_relative_url(self._search_regex(
- r'ipadUrl: \'(.+?cloudfront.net/)', raw_page, 'base url'), 'http:')
- formats_json = self._search_regex(
- r'bitrates: (\[.+?\])', raw_page, 'video formats')
- formats_mit = json.loads(formats_json)
- formats = [
- {
- 'format_id': f['label'],
- 'url': base_url + f['url'].partition(':')[2],
- 'ext': f['url'].partition(':')[0],
- 'format': f['label'],
- 'width': f['width'],
- 'vbr': f['bitrate'],
- }
- for f in formats_mit
- ]
-
- title = get_element_by_id('edit-title', clean_page)
- description = clean_html(get_element_by_id('edit-description', clean_page))
- thumbnail = self._search_regex(
- r'playlist:.*?url: \'(.+?)\'',
- raw_page, 'thumbnail', flags=re.DOTALL)
-
- return {
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- 'description': description,
- 'thumbnail': thumbnail,
- }
-
-
-class MITIE(TechTVMITIE):
- IE_NAME = 'video.mit.edu'
- _VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)'
-
- _TEST = {
- 'url': 'http://video.mit.edu/watch/the-government-is-profiling-you-13222/',
- 'md5': '7db01d5ccc1895fc5010e9c9e13648da',
- 'info_dict': {
- 'id': '21783',
- 'ext': 'mp4',
- 'title': 'The Government is Profiling You',
- 'description': 'md5:ad5795fe1e1623b73620dbfd47df9afd',
- },
- }
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- page_title = mobj.group('title')
- webpage = self._download_webpage(url, page_title)
- embed_url = self._search_regex(
- r'<iframe .*?src="(.+?)"', webpage, 'embed url')
- return self.url_result(embed_url)
-
-
-class OCWMITIE(InfoExtractor):
- IE_NAME = 'ocw.mit.edu'
- _VALID_URL = r'^https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
- _BASE_URL = 'http://ocw.mit.edu/'
-
- _TESTS = [
- {
- 'url': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/',
- 'info_dict': {
- 'id': 'EObHWIEKGjA',
- 'ext': 'webm',
- 'title': 'Lecture 7: Multiple Discrete Random Variables: Expectations, Conditioning, Independence',
- 'description': 'In this lecture, the professor discussed multiple random variables, expectations, and binomial distribution.',
- 'upload_date': '20121109',
- 'uploader_id': 'MIT',
- 'uploader': 'MIT OpenCourseWare',
- }
- },
- {
- 'url': 'http://ocw.mit.edu/courses/mathematics/18-01sc-single-variable-calculus-fall-2010/1.-differentiation/part-a-definition-and-basic-rules/session-1-introduction-to-derivatives/',
- 'info_dict': {
- 'id': '7K1sB05pE0A',
- 'ext': 'mp4',
- 'title': 'Session 1: Introduction to Derivatives',
- 'upload_date': '20090818',
- 'uploader_id': 'MIT',
- 'uploader': 'MIT OpenCourseWare',
- 'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.',
- }
- }
- ]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- topic = mobj.group('topic')
-
- webpage = self._download_webpage(url, topic)
- title = self._html_search_meta('WT.cg_s', webpage)
- description = self._html_search_meta('Description', webpage)
-
- # search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, start, stop, captions_file)
- embed_chapter_media = re.search(r'ocw_embed_chapter_media\((.+?)\)', webpage)
- if embed_chapter_media:
- metadata = re.sub(r'[\'"]', '', embed_chapter_media.group(1))
- metadata = re.split(r', ?', metadata)
- yt = metadata[1]
- else:
- # search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, captions_file)
- embed_media = re.search(r'ocw_embed_media\((.+?)\)', webpage)
- if embed_media:
- metadata = re.sub(r'[\'"]', '', embed_media.group(1))
- metadata = re.split(r', ?', metadata)
- yt = metadata[1]
- else:
- raise ExtractorError('Unable to find embedded YouTube video.')
- video_id = YoutubeIE.extract_id(yt)
-
- return {
- '_type': 'url_transparent',
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'url': yt,
- 'ie_key': 'Youtube',
- }
diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py
deleted file mode 100644
index 40f214a87..000000000
--- a/youtube_dl/extractor/mitele.py
+++ /dev/null
@@ -1,120 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- smuggle_url,
- parse_duration,
-)
-
-
-class MiTeleIE(InfoExtractor):
- IE_DESC = 'mitele.es'
- _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
-
- _TESTS = [{
- 'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player',
- 'info_dict': {
- 'id': 'FhYW1iNTE6J6H7NkQRIEzfne6t2quqPg',
- 'ext': 'mp4',
- 'title': 'Tor, la web invisible',
- 'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
- 'series': 'Diario de',
- 'season': 'La redacción',
- 'season_number': 14,
- 'season_id': 'diario_de_t14_11981',
- 'episode': 'Programa 144',
- 'episode_number': 3,
- 'thumbnail': r're:(?i)^https?://.*\.jpg$',
- 'duration': 2913,
- },
- 'add_ie': ['Ooyala'],
- }, {
- # no explicit title
- 'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
- 'info_dict': {
- 'id': 'oyNG1iNTE6TAPP-JmCjbwfwJqqMMX3Vq',
- 'ext': 'mp4',
- 'title': 'Cuarto Milenio Temporada 6 Programa 226',
- 'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f',
- 'series': 'Cuarto Milenio',
- 'season': 'Temporada 6',
- 'season_number': 6,
- 'season_id': 'cuarto_milenio_t06_12715',
- 'episode': 'Programa 226',
- 'episode_number': 24,
- 'thumbnail': r're:(?i)^https?://.*\.jpg$',
- 'duration': 7313,
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': ['Ooyala'],
- }, {
- 'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- paths = self._download_json(
- 'https://www.mitele.es/amd/agp/web/metadata/general_configuration',
- video_id, 'Downloading paths JSON')
-
- ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search']
- base_url = ooyala_s.get('base_url', 'cdn-search-mediaset.carbyne.ps.ooyala.com')
- full_path = ooyala_s.get('full_path', '/search/v1/full/providers/')
- source = self._download_json(
- '%s://%s%s%s/docs/%s' % (
- ooyala_s.get('protocol', 'https'), base_url, full_path,
- ooyala_s.get('provider_id', '104951'), video_id),
- video_id, 'Downloading data JSON', query={
- 'include_titles': 'Series,Season',
- 'product_name': ooyala_s.get('product_name', 'test'),
- 'format': 'full',
- })['hits']['hits'][0]['_source']
-
- embedCode = source['offers'][0]['embed_codes'][0]
- titles = source['localizable_titles'][0]
-
- title = titles.get('title_medium') or titles['title_long']
-
- description = titles.get('summary_long') or titles.get('summary_medium')
-
- def get(key1, key2):
- value1 = source.get(key1)
- if not value1 or not isinstance(value1, list):
- return
- if not isinstance(value1[0], dict):
- return
- return value1[0].get(key2)
-
- series = get('localizable_titles_series', 'title_medium')
-
- season = get('localizable_titles_season', 'title_medium')
- season_number = int_or_none(source.get('season_number'))
- season_id = source.get('season_id')
-
- episode = titles.get('title_sort_name')
- episode_number = int_or_none(source.get('episode_number'))
-
- duration = parse_duration(get('videos', 'duration'))
-
- return {
- '_type': 'url_transparent',
- # for some reason only HLS is supported
- 'url': smuggle_url('ooyala:' + embedCode, {'supportedformats': 'm3u8,dash'}),
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'series': series,
- 'season': season,
- 'season_number': season_number,
- 'season_id': season_id,
- 'episode': episode,
- 'episode_number': episode_number,
- 'duration': duration,
- 'thumbnail': get('images', 'url'),
- }
diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py
deleted file mode 100644
index bf5353ef9..000000000
--- a/youtube_dl/extractor/mixcloud.py
+++ /dev/null
@@ -1,398 +0,0 @@
-from __future__ import unicode_literals
-
-import functools
-import itertools
-import re
-
-from .common import InfoExtractor
-from ..compat import (
- compat_b64decode,
- compat_chr,
- compat_ord,
- compat_str,
- compat_urllib_parse_unquote,
- compat_urlparse,
- compat_zip
-)
-from ..utils import (
- clean_html,
- ExtractorError,
- int_or_none,
- OnDemandPagedList,
- str_to_int,
- try_get,
- urljoin,
-)
-
-
-class MixcloudIE(InfoExtractor):
- _VALID_URL = r'https?://(?:(?:www|beta|m)\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)'
- IE_NAME = 'mixcloud'
-
- _TESTS = [{
- 'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
- 'info_dict': {
- 'id': 'dholbach-cryptkeeper',
- 'ext': 'm4a',
- 'title': 'Cryptkeeper',
- 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
- 'uploader': 'Daniel Holbach',
- 'uploader_id': 'dholbach',
- 'thumbnail': r're:https?://.*\.jpg',
- 'view_count': int,
- },
- }, {
- 'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
- 'info_dict': {
- 'id': 'gillespeterson-caribou-7-inch-vinyl-mix-chat',
- 'ext': 'mp3',
- 'title': 'Caribou 7 inch Vinyl Mix & Chat',
- 'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
- 'uploader': 'Gilles Peterson Worldwide',
- 'uploader_id': 'gillespeterson',
- 'thumbnail': 're:https?://.*',
- 'view_count': int,
- },
- }, {
- 'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
- 'only_matching': True,
- }]
-
- @staticmethod
- def _decrypt_xor_cipher(key, ciphertext):
- """Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR."""
- return ''.join([
- compat_chr(compat_ord(ch) ^ compat_ord(k))
- for ch, k in compat_zip(ciphertext, itertools.cycle(key))])
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- uploader = mobj.group(1)
- cloudcast_name = mobj.group(2)
- track_id = compat_urllib_parse_unquote('-'.join((uploader, cloudcast_name)))
-
- webpage = self._download_webpage(url, track_id)
-
- # Legacy path
- encrypted_play_info = self._search_regex(
- r'm-play-info="([^"]+)"', webpage, 'play info', default=None)
-
- if encrypted_play_info is not None:
- # Decode
- encrypted_play_info = compat_b64decode(encrypted_play_info)
- else:
- # New path
- full_info_json = self._parse_json(self._html_search_regex(
- r'<script id="relay-data" type="text/x-mixcloud">([^<]+)</script>',
- webpage, 'play info'), 'play info')
- for item in full_info_json:
- item_data = try_get(
- item, lambda x: x['cloudcast']['data']['cloudcastLookup'],
- dict)
- if try_get(item_data, lambda x: x['streamInfo']['url']):
- info_json = item_data
- break
- else:
- raise ExtractorError('Failed to extract matching stream info')
-
- message = self._html_search_regex(
- r'(?s)<div[^>]+class="global-message cloudcast-disabled-notice-light"[^>]*>(.+?)<(?:a|/div)',
- webpage, 'error message', default=None)
-
- js_url = self._search_regex(
- r'<script[^>]+\bsrc=["\"](https://(?:www\.)?mixcloud\.com/media/(?:js2/www_js_4|js/www)\.[^>]+\.js)',
- webpage, 'js url')
- js = self._download_webpage(js_url, track_id, 'Downloading JS')
- # Known plaintext attack
- if encrypted_play_info:
- kps = ['{"stream_url":']
- kpa_target = encrypted_play_info
- else:
- kps = ['https://', 'http://']
- kpa_target = compat_b64decode(info_json['streamInfo']['url'])
- for kp in kps:
- partial_key = self._decrypt_xor_cipher(kpa_target, kp)
- for quote in ["'", '"']:
- key = self._search_regex(
- r'{0}({1}[^{0}]*){0}'.format(quote, re.escape(partial_key)),
- js, 'encryption key', default=None)
- if key is not None:
- break
- else:
- continue
- break
- else:
- raise ExtractorError('Failed to extract encryption key')
-
- if encrypted_play_info is not None:
- play_info = self._parse_json(self._decrypt_xor_cipher(key, encrypted_play_info), 'play info')
- if message and 'stream_url' not in play_info:
- raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
- song_url = play_info['stream_url']
- formats = [{
- 'format_id': 'normal',
- 'url': song_url
- }]
-
- title = self._html_search_regex(r'm-title="([^"]+)"', webpage, 'title')
- thumbnail = self._proto_relative_url(self._html_search_regex(
- r'm-thumbnail-url="([^"]+)"', webpage, 'thumbnail', fatal=False))
- uploader = self._html_search_regex(
- r'm-owner-name="([^"]+)"', webpage, 'uploader', fatal=False)
- uploader_id = self._search_regex(
- r'\s+"profile": "([^"]+)",', webpage, 'uploader id', fatal=False)
- description = self._og_search_description(webpage)
- view_count = str_to_int(self._search_regex(
- [r'<meta itemprop="interactionCount" content="UserPlays:([0-9]+)"',
- r'/listeners/?">([0-9,.]+)</a>',
- r'(?:m|data)-tooltip=["\']([\d,.]+) plays'],
- webpage, 'play count', default=None))
-
- else:
- title = info_json['name']
- thumbnail = urljoin(
- 'https://thumbnailer.mixcloud.com/unsafe/600x600/',
- try_get(info_json, lambda x: x['picture']['urlRoot'], compat_str))
- uploader = try_get(info_json, lambda x: x['owner']['displayName'])
- uploader_id = try_get(info_json, lambda x: x['owner']['username'])
- description = try_get(info_json, lambda x: x['description'])
- view_count = int_or_none(try_get(info_json, lambda x: x['plays']))
-
- stream_info = info_json['streamInfo']
- formats = []
-
- def decrypt_url(f_url):
- for k in (key, 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'):
- decrypted_url = self._decrypt_xor_cipher(k, f_url)
- if re.search(r'^https?://[0-9A-Za-z.]+/[0-9A-Za-z/.?=&_-]+$', decrypted_url):
- return decrypted_url
-
- for url_key in ('url', 'hlsUrl', 'dashUrl'):
- format_url = stream_info.get(url_key)
- if not format_url:
- continue
- decrypted = decrypt_url(compat_b64decode(format_url))
- if not decrypted:
- continue
- if url_key == 'hlsUrl':
- formats.extend(self._extract_m3u8_formats(
- decrypted, track_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
- elif url_key == 'dashUrl':
- formats.extend(self._extract_mpd_formats(
- decrypted, track_id, mpd_id='dash', fatal=False))
- else:
- formats.append({
- 'format_id': 'http',
- 'url': decrypted,
- 'downloader_options': {
- # Mixcloud starts throttling at >~5M
- 'http_chunk_size': 5242880,
- },
- })
- self._sort_formats(formats)
-
- return {
- 'id': track_id,
- 'title': title,
- 'formats': formats,
- 'description': description,
- 'thumbnail': thumbnail,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'view_count': view_count,
- }
-
-
-class MixcloudPlaylistBaseIE(InfoExtractor):
- _PAGE_SIZE = 24
-
- def _find_urls_in_page(self, page):
- for url in re.findall(r'm-play-button m-url="(?P<url>[^"]+)"', page):
- yield self.url_result(
- compat_urlparse.urljoin('https://www.mixcloud.com', clean_html(url)),
- MixcloudIE.ie_key())
-
- def _fetch_tracks_page(self, path, video_id, page_name, current_page, real_page_number=None):
- real_page_number = real_page_number or current_page + 1
- return self._download_webpage(
- 'https://www.mixcloud.com/%s/' % path, video_id,
- note='Download %s (page %d)' % (page_name, current_page + 1),
- errnote='Unable to download %s' % page_name,
- query={'page': real_page_number, 'list': 'main', '_ajax': '1'},
- headers={'X-Requested-With': 'XMLHttpRequest'})
-
- def _tracks_page_func(self, page, video_id, page_name, current_page):
- resp = self._fetch_tracks_page(page, video_id, page_name, current_page)
-
- for item in self._find_urls_in_page(resp):
- yield item
-
- def _get_user_description(self, page_content):
- return self._html_search_regex(
- r'<div[^>]+class="profile-bio"[^>]*>(.+?)</div>',
- page_content, 'user description', fatal=False)
-
-
-class MixcloudUserIE(MixcloudPlaylistBaseIE):
- _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/(?P<type>uploads|favorites|listens)?/?$'
- IE_NAME = 'mixcloud:user'
-
- _TESTS = [{
- 'url': 'http://www.mixcloud.com/dholbach/',
- 'info_dict': {
- 'id': 'dholbach_uploads',
- 'title': 'Daniel Holbach (uploads)',
- 'description': 'md5:def36060ac8747b3aabca54924897e47',
- },
- 'playlist_mincount': 11,
- }, {
- 'url': 'http://www.mixcloud.com/dholbach/uploads/',
- 'info_dict': {
- 'id': 'dholbach_uploads',
- 'title': 'Daniel Holbach (uploads)',
- 'description': 'md5:def36060ac8747b3aabca54924897e47',
- },
- 'playlist_mincount': 11,
- }, {
- 'url': 'http://www.mixcloud.com/dholbach/favorites/',
- 'info_dict': {
- 'id': 'dholbach_favorites',
- 'title': 'Daniel Holbach (favorites)',
- 'description': 'md5:def36060ac8747b3aabca54924897e47',
- },
- 'params': {
- 'playlist_items': '1-100',
- },
- 'playlist_mincount': 100,
- }, {
- 'url': 'http://www.mixcloud.com/dholbach/listens/',
- 'info_dict': {
- 'id': 'dholbach_listens',
- 'title': 'Daniel Holbach (listens)',
- 'description': 'md5:def36060ac8747b3aabca54924897e47',
- },
- 'params': {
- 'playlist_items': '1-100',
- },
- 'playlist_mincount': 100,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- user_id = mobj.group('user')
- list_type = mobj.group('type')
-
- # if only a profile URL was supplied, default to download all uploads
- if list_type is None:
- list_type = 'uploads'
-
- video_id = '%s_%s' % (user_id, list_type)
-
- profile = self._download_webpage(
- 'https://www.mixcloud.com/%s/' % user_id, video_id,
- note='Downloading user profile',
- errnote='Unable to download user profile')
-
- username = self._og_search_title(profile)
- description = self._get_user_description(profile)
-
- entries = OnDemandPagedList(
- functools.partial(
- self._tracks_page_func,
- '%s/%s' % (user_id, list_type), video_id, 'list of %s' % list_type),
- self._PAGE_SIZE)
-
- return self.playlist_result(
- entries, video_id, '%s (%s)' % (username, list_type), description)
-
-
-class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
- _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$'
- IE_NAME = 'mixcloud:playlist'
-
- _TESTS = [{
- 'url': 'https://www.mixcloud.com/RedBullThre3style/playlists/tokyo-finalists-2015/',
- 'info_dict': {
- 'id': 'RedBullThre3style_tokyo-finalists-2015',
- 'title': 'National Champions 2015',
- 'description': 'md5:6ff5fb01ac76a31abc9b3939c16243a3',
- },
- 'playlist_mincount': 16,
- }, {
- 'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- user_id = mobj.group('user')
- playlist_id = mobj.group('playlist')
- video_id = '%s_%s' % (user_id, playlist_id)
-
- webpage = self._download_webpage(
- url, user_id,
- note='Downloading playlist page',
- errnote='Unable to download playlist page')
-
- title = self._html_search_regex(
- r'<a[^>]+class="parent active"[^>]*><b>\d+</b><span[^>]*>([^<]+)',
- webpage, 'playlist title',
- default=None) or self._og_search_title(webpage, fatal=False)
- description = self._get_user_description(webpage)
-
- entries = OnDemandPagedList(
- functools.partial(
- self._tracks_page_func,
- '%s/playlists/%s' % (user_id, playlist_id), video_id, 'tracklist'),
- self._PAGE_SIZE)
-
- return self.playlist_result(entries, video_id, title, description)
-
-
-class MixcloudStreamIE(MixcloudPlaylistBaseIE):
- _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/stream/?$'
- IE_NAME = 'mixcloud:stream'
-
- _TEST = {
- 'url': 'https://www.mixcloud.com/FirstEar/stream/',
- 'info_dict': {
- 'id': 'FirstEar',
- 'title': 'First Ear',
- 'description': 'Curators of good music\nfirstearmusic.com',
- },
- 'playlist_mincount': 192,
- }
-
- def _real_extract(self, url):
- user_id = self._match_id(url)
-
- webpage = self._download_webpage(url, user_id)
-
- entries = []
- prev_page_url = None
-
- def _handle_page(page):
- entries.extend(self._find_urls_in_page(page))
- return self._search_regex(
- r'm-next-page-url="([^"]+)"', page,
- 'next page URL', default=None)
-
- next_page_url = _handle_page(webpage)
-
- for idx in itertools.count(0):
- if not next_page_url or prev_page_url == next_page_url:
- break
-
- prev_page_url = next_page_url
- current_page = int(self._search_regex(
- r'\?page=(\d+)', next_page_url, 'next page number'))
-
- next_page_url = _handle_page(self._fetch_tracks_page(
- '%s/stream' % user_id, user_id, 'stream', idx,
- real_page_number=current_page))
-
- username = self._og_search_title(webpage)
- description = self._get_user_description(webpage)
-
- return self.playlist_result(entries, user_id, username, description)
diff --git a/youtube_dl/extractor/mofosex.py b/youtube_dl/extractor/mofosex.py
deleted file mode 100644
index 1c652813a..000000000
--- a/youtube_dl/extractor/mofosex.py
+++ /dev/null
@@ -1,56 +0,0 @@
-from __future__ import unicode_literals
-
-from ..utils import (
- int_or_none,
- str_to_int,
- unified_strdate,
-)
-from .keezmovies import KeezMoviesIE
-
-
-class MofosexIE(KeezMoviesIE):
- _VALID_URL = r'https?://(?:www\.)?mofosex\.com/videos/(?P<id>\d+)/(?P<display_id>[^/?#&.]+)\.html'
- _TESTS = [{
- 'url': 'http://www.mofosex.com/videos/318131/amateur-teen-playing-and-masturbating-318131.html',
- 'md5': '558fcdafbb63a87c019218d6e49daf8a',
- 'info_dict': {
- 'id': '318131',
- 'display_id': 'amateur-teen-playing-and-masturbating-318131',
- 'ext': 'mp4',
- 'title': 'amateur teen playing and masturbating',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'upload_date': '20121114',
- 'view_count': int,
- 'like_count': int,
- 'dislike_count': int,
- 'age_limit': 18,
- }
- }, {
- # This video is no longer available
- 'url': 'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- webpage, info = self._extract_info(url)
-
- view_count = str_to_int(self._search_regex(
- r'VIEWS:</span>\s*([\d,.]+)', webpage, 'view count', fatal=False))
- like_count = int_or_none(self._search_regex(
- r'id=["\']amountLikes["\'][^>]*>(\d+)', webpage,
- 'like count', fatal=False))
- dislike_count = int_or_none(self._search_regex(
- r'id=["\']amountDislikes["\'][^>]*>(\d+)', webpage,
- 'like count', fatal=False))
- upload_date = unified_strdate(self._html_search_regex(
- r'Added:</span>([^<]+)', webpage, 'upload date', fatal=False))
-
- info.update({
- 'view_count': view_count,
- 'like_count': like_count,
- 'dislike_count': dislike_count,
- 'upload_date': upload_date,
- 'thumbnail': self._og_search_thumbnail(webpage),
- })
-
- return info
diff --git a/youtube_dl/extractor/motherless.py b/youtube_dl/extractor/motherless.py
deleted file mode 100644
index 43fd70f11..000000000
--- a/youtube_dl/extractor/motherless.py
+++ /dev/null
@@ -1,205 +0,0 @@
-from __future__ import unicode_literals
-
-import datetime
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_urlparse
-from ..utils import (
- ExtractorError,
- InAdvancePagedList,
- orderedSet,
- str_to_int,
- unified_strdate,
-)
-
-
-class MotherlessIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P<id>[A-Z0-9]+)'
- _TESTS = [{
- 'url': 'http://motherless.com/AC3FFE1',
- 'md5': '310f62e325a9fafe64f68c0bccb6e75f',
- 'info_dict': {
- 'id': 'AC3FFE1',
- 'ext': 'mp4',
- 'title': 'Fucked in the ass while playing PS3',
- 'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
- 'upload_date': '20100913',
- 'uploader_id': 'famouslyfuckedup',
- 'thumbnail': r're:http://.*\.jpg',
- 'age_limit': 18,
- }
- }, {
- 'url': 'http://motherless.com/532291B',
- 'md5': 'bc59a6b47d1f958e61fbd38a4d31b131',
- 'info_dict': {
- 'id': '532291B',
- 'ext': 'mp4',
- 'title': 'Amazing girl playing the omegle game, PERFECT!',
- 'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen',
- 'game', 'hairy'],
- 'upload_date': '20140622',
- 'uploader_id': 'Sulivana7x',
- 'thumbnail': r're:http://.*\.jpg',
- 'age_limit': 18,
- },
- 'skip': '404',
- }, {
- 'url': 'http://motherless.com/g/cosplay/633979F',
- 'md5': '0b2a43f447a49c3e649c93ad1fafa4a0',
- 'info_dict': {
- 'id': '633979F',
- 'ext': 'mp4',
- 'title': 'Turtlette',
- 'categories': ['superheroine heroine superher'],
- 'upload_date': '20140827',
- 'uploader_id': 'shade0230',
- 'thumbnail': r're:http://.*\.jpg',
- 'age_limit': 18,
- }
- }, {
- # no keywords
- 'url': 'http://motherless.com/8B4BBC1',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- if any(p in webpage for p in (
- '<title>404 - MOTHERLESS.COM<',
- ">The page you're looking for cannot be found.<")):
- raise ExtractorError('Video %s does not exist' % video_id, expected=True)
-
- if '>The content you are trying to view is for friends only.' in webpage:
- raise ExtractorError('Video %s is for friends only' % video_id, expected=True)
-
- title = self._html_search_regex(
- r'id="view-upload-title">\s+([^<]+)<', webpage, 'title')
- video_url = (self._html_search_regex(
- (r'setup\(\{\s*["\']file["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
- r'fileurl\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
- webpage, 'video URL', default=None, group='url')
- or 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)
- age_limit = self._rta_search(webpage)
- view_count = str_to_int(self._html_search_regex(
- r'<strong>Views</strong>\s+([^<]+)<',
- webpage, 'view count', fatal=False))
- like_count = str_to_int(self._html_search_regex(
- r'<strong>Favorited</strong>\s+([^<]+)<',
- webpage, 'like count', fatal=False))
-
- upload_date = self._html_search_regex(
- r'<strong>Uploaded</strong>\s+([^<]+)<', webpage, 'upload date')
- if 'Ago' in upload_date:
- days = int(re.search(r'([0-9]+)', upload_date).group(1))
- upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d')
- else:
- upload_date = unified_strdate(upload_date)
-
- comment_count = webpage.count('class="media-comment-contents"')
- uploader_id = self._html_search_regex(
- r'"thumb-member-username">\s+<a href="/m/([^"]+)"',
- webpage, 'uploader_id')
-
- categories = self._html_search_meta('keywords', webpage, default=None)
- if categories:
- categories = [cat.strip() for cat in categories.split(',')]
-
- return {
- 'id': video_id,
- 'title': title,
- 'upload_date': upload_date,
- 'uploader_id': uploader_id,
- 'thumbnail': self._og_search_thumbnail(webpage),
- 'categories': categories,
- 'view_count': view_count,
- 'like_count': like_count,
- 'comment_count': comment_count,
- 'age_limit': age_limit,
- 'url': video_url,
- }
-
-
-class MotherlessGroupIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?motherless\.com/gv?/(?P<id>[a-z0-9_]+)'
- _TESTS = [{
- 'url': 'http://motherless.com/g/movie_scenes',
- 'info_dict': {
- 'id': 'movie_scenes',
- 'title': 'Movie Scenes',
- 'description': 'Hot and sexy scenes from "regular" movies... '
- 'Beautiful actresses fully nude... A looot of '
- 'skin! :)Enjoy!',
- },
- 'playlist_mincount': 662,
- }, {
- 'url': 'http://motherless.com/gv/sex_must_be_funny',
- 'info_dict': {
- 'id': 'sex_must_be_funny',
- 'title': 'Sex must be funny',
- 'description': 'Sex can be funny. Wide smiles,laugh, games, fun of '
- 'any kind!'
- },
- 'playlist_mincount': 9,
- }]
-
- @classmethod
- def suitable(cls, url):
- return (False if MotherlessIE.suitable(url)
- else super(MotherlessGroupIE, cls).suitable(url))
-
- def _extract_entries(self, webpage, base):
- entries = []
- for mobj in re.finditer(
- r'href="(?P<href>/[^"]+)"[^>]*>(?:\s*<img[^>]+alt="[^-]+-\s(?P<title>[^"]+)")?',
- webpage):
- video_url = compat_urlparse.urljoin(base, mobj.group('href'))
- if not MotherlessIE.suitable(video_url):
- continue
- video_id = MotherlessIE._match_id(video_url)
- title = mobj.group('title')
- entries.append(self.url_result(
- video_url, ie=MotherlessIE.ie_key(), video_id=video_id,
- video_title=title))
- # Alternative fallback
- if not entries:
- entries = [
- self.url_result(
- compat_urlparse.urljoin(base, '/' + entry_id),
- ie=MotherlessIE.ie_key(), video_id=entry_id)
- for entry_id in orderedSet(re.findall(
- r'data-codename=["\']([A-Z0-9]+)', webpage))]
- return entries
-
- def _real_extract(self, url):
- group_id = self._match_id(url)
- page_url = compat_urlparse.urljoin(url, '/gv/%s' % group_id)
- webpage = self._download_webpage(page_url, group_id)
- title = self._search_regex(
- r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False)
- description = self._html_search_meta(
- 'description', webpage, fatal=False)
- page_count = self._int(self._search_regex(
- r'(\d+)</(?:a|span)><(?:a|span)[^>]+>\s*NEXT',
- webpage, 'page_count'), 'page_count')
- PAGE_SIZE = 80
-
- def _get_page(idx):
- webpage = self._download_webpage(
- page_url, group_id, query={'page': idx + 1},
- note='Downloading page %d/%d' % (idx + 1, page_count)
- )
- for entry in self._extract_entries(webpage, url):
- yield entry
-
- playlist = InAdvancePagedList(_get_page, page_count, PAGE_SIZE)
-
- return {
- '_type': 'playlist',
- 'id': group_id,
- 'title': title,
- 'description': description,
- 'entries': playlist
- }
diff --git a/youtube_dl/extractor/msn.py b/youtube_dl/extractor/msn.py
deleted file mode 100644
index 0460cf4d5..000000000
--- a/youtube_dl/extractor/msn.py
+++ /dev/null
@@ -1,115 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- determine_ext,
- ExtractorError,
- int_or_none,
- unescapeHTML,
-)
-
-
-class MSNIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?msn\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/[a-z]{2}-(?P<id>[\da-zA-Z]+)'
- _TESTS = [{
- 'url': 'http://www.msn.com/en-ae/foodanddrink/joinourtable/criminal-minds-shemar-moore-shares-a-touching-goodbye-message/vp-BBqQYNE',
- 'md5': '8442f66c116cbab1ff7098f986983458',
- 'info_dict': {
- 'id': 'BBqQYNE',
- 'display_id': 'criminal-minds-shemar-moore-shares-a-touching-goodbye-message',
- 'ext': 'mp4',
- 'title': 'Criminal Minds - Shemar Moore Shares A Touching Goodbye Message',
- 'description': 'md5:e8e89b897b222eb33a6b5067a8f1bc25',
- 'duration': 104,
- 'uploader': 'CBS Entertainment',
- 'uploader_id': 'IT0X5aoJ6bJgYerJXSDCgFmYPB1__54v',
- },
- }, {
- 'url': 'http://www.msn.com/en-ae/news/offbeat/meet-the-nine-year-old-self-made-millionaire/ar-BBt6ZKf',
- 'only_matching': True,
- }, {
- 'url': 'http://www.msn.com/en-ae/video/watch/obama-a-lot-of-people-will-be-disappointed/vi-AAhxUMH',
- 'only_matching': True,
- }, {
- # geo restricted
- 'url': 'http://www.msn.com/en-ae/foodanddrink/joinourtable/the-first-fart-makes-you-laugh-the-last-fart-makes-you-cry/vp-AAhzIBU',
- 'only_matching': True,
- }, {
- 'url': 'http://www.msn.com/en-ae/entertainment/bollywood/watch-how-salman-khan-reacted-when-asked-if-he-would-apologize-for-his-‘raped-woman’-comment/vi-AAhvzW6',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id, display_id = mobj.group('id', 'display_id')
-
- webpage = self._download_webpage(url, display_id)
-
- video = self._parse_json(
- self._search_regex(
- r'data-metadata\s*=\s*(["\'])(?P<data>.+?)\1',
- webpage, 'video data', default='{}', group='data'),
- display_id, transform_source=unescapeHTML)
-
- if not video:
- error = unescapeHTML(self._search_regex(
- r'data-error=(["\'])(?P<error>.+?)\1',
- webpage, 'error', group='error'))
- raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
-
- title = video['title']
-
- formats = []
- for file_ in video.get('videoFiles', []):
- format_url = file_.get('url')
- if not format_url:
- continue
- if 'm3u8' in format_url:
- # m3u8_native should not be used here until
- # https://github.com/ytdl-org/youtube-dl/issues/9913 is fixed
- m3u8_formats = self._extract_m3u8_formats(
- format_url, display_id, 'mp4',
- m3u8_id='hls', fatal=False)
- formats.extend(m3u8_formats)
- elif determine_ext(format_url) == 'ism':
- formats.extend(self._extract_ism_formats(
- format_url + '/Manifest', display_id, 'mss', fatal=False))
- else:
- formats.append({
- 'url': format_url,
- 'ext': 'mp4',
- 'format_id': 'http',
- 'width': int_or_none(file_.get('width')),
- 'height': int_or_none(file_.get('height')),
- })
- self._sort_formats(formats)
-
- subtitles = {}
- for file_ in video.get('files', []):
- format_url = file_.get('url')
- format_code = file_.get('formatCode')
- if not format_url or not format_code:
- continue
- if compat_str(format_code) == '3100':
- subtitles.setdefault(file_.get('culture', 'en'), []).append({
- 'ext': determine_ext(format_url, 'ttml'),
- 'url': format_url,
- })
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': video.get('description'),
- 'thumbnail': video.get('headlineImage', {}).get('url'),
- 'duration': int_or_none(video.get('durationSecs')),
- 'uploader': video.get('sourceFriendly'),
- 'uploader_id': video.get('providerId'),
- 'creator': video.get('creator'),
- 'subtitles': subtitles,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py
deleted file mode 100644
index 7a3b57abd..000000000
--- a/youtube_dl/extractor/mtv.py
+++ /dev/null
@@ -1,490 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import (
- compat_str,
- compat_xpath,
-)
-from ..utils import (
- ExtractorError,
- find_xpath_attr,
- fix_xml_ampersands,
- float_or_none,
- HEADRequest,
- RegexNotFoundError,
- sanitized_Request,
- strip_or_none,
- timeconvert,
- try_get,
- unescapeHTML,
- update_url_query,
- url_basename,
- xpath_text,
-)
-
-
-def _media_xml_tag(tag):
- return '{http://search.yahoo.com/mrss/}%s' % tag
-
-
-class MTVServicesInfoExtractor(InfoExtractor):
- _MOBILE_TEMPLATE = None
- _LANG = None
-
- @staticmethod
- def _id_from_uri(uri):
- return uri.split(':')[-1]
-
- @staticmethod
- def _remove_template_parameter(url):
- # Remove the templates, like &device={device}
- return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url)
-
- def _get_feed_url(self, uri):
- return self._FEED_URL
-
- def _get_thumbnail_url(self, uri, itemdoc):
- search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
- thumb_node = itemdoc.find(search_path)
- if thumb_node is None:
- return None
- return thumb_node.get('url') or thumb_node.text or None
-
- def _extract_mobile_video_formats(self, mtvn_id):
- webpage_url = self._MOBILE_TEMPLATE % mtvn_id
- req = sanitized_Request(webpage_url)
- # Otherwise we get a webpage that would execute some javascript
- req.add_header('User-Agent', 'curl/7')
- webpage = self._download_webpage(req, mtvn_id,
- 'Downloading mobile page')
- metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url'))
- req = HEADRequest(metrics_url)
- response = self._request_webpage(req, mtvn_id, 'Resolving url')
- url = response.geturl()
- # Transform the url to get the best quality:
- url = re.sub(r'.+pxE=mp4', 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=0+_pxK=18639+_pxE=mp4', url, 1)
- return [{'url': url, 'ext': 'mp4'}]
-
- def _extract_video_formats(self, mdoc, mtvn_id, video_id):
- if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4|copyright_error\.flv(?:\?geo\b.+?)?)$', mdoc.find('.//src').text) is not None:
- if mtvn_id is not None and self._MOBILE_TEMPLATE is not None:
- self.to_screen('The normal version is not available from your '
- 'country, trying with the mobile version')
- return self._extract_mobile_video_formats(mtvn_id)
- raise ExtractorError('This video is not available from your country.',
- expected=True)
-
- formats = []
- for rendition in mdoc.findall('.//rendition'):
- if rendition.get('method') == 'hls':
- hls_url = rendition.find('./src').text
- formats.extend(self._extract_m3u8_formats(
- hls_url, video_id, ext='mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
- else:
- # fms
- try:
- _, _, ext = rendition.attrib['type'].partition('/')
- rtmp_video_url = rendition.find('./src').text
- if 'error_not_available.swf' in rtmp_video_url:
- raise ExtractorError(
- '%s said: video is not available' % self.IE_NAME,
- expected=True)
- if rtmp_video_url.endswith('siteunavail.png'):
- continue
- formats.extend([{
- 'ext': 'flv' if rtmp_video_url.startswith('rtmp') else ext,
- 'url': rtmp_video_url,
- 'format_id': '-'.join(filter(None, [
- 'rtmp' if rtmp_video_url.startswith('rtmp') else None,
- rendition.get('bitrate')])),
- 'width': int(rendition.get('width')),
- 'height': int(rendition.get('height')),
- }])
- except (KeyError, TypeError):
- raise ExtractorError('Invalid rendition field.')
- if formats:
- self._sort_formats(formats)
- return formats
-
- def _extract_subtitles(self, mdoc, mtvn_id):
- subtitles = {}
- for transcript in mdoc.findall('.//transcript'):
- if transcript.get('kind') != 'captions':
- continue
- lang = transcript.get('srclang')
- for typographic in transcript.findall('./typographic'):
- sub_src = typographic.get('src')
- if not sub_src:
- continue
- ext = typographic.get('format')
- if ext == 'cea-608':
- ext = 'scc'
- subtitles.setdefault(lang, []).append({
- 'url': compat_str(sub_src),
- 'ext': ext
- })
- return subtitles
-
- def _get_video_info(self, itemdoc, use_hls=True):
- uri = itemdoc.find('guid').text
- video_id = self._id_from_uri(uri)
- self.report_extraction(video_id)
- content_el = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content')))
- mediagen_url = self._remove_template_parameter(content_el.attrib['url'])
- mediagen_url = mediagen_url.replace('device={device}', '')
- if 'acceptMethods' not in mediagen_url:
- mediagen_url += '&' if '?' in mediagen_url else '?'
- mediagen_url += 'acceptMethods='
- mediagen_url += 'hls' if use_hls else 'fms'
-
- mediagen_doc = self._download_xml(
- mediagen_url, video_id, 'Downloading video urls', fatal=False)
-
- if mediagen_doc is False:
- return None
-
- item = mediagen_doc.find('./video/item')
- if item is not None and item.get('type') == 'text':
- message = '%s returned error: ' % self.IE_NAME
- if item.get('code') is not None:
- message += '%s - ' % item.get('code')
- message += item.text
- raise ExtractorError(message, expected=True)
-
- description = strip_or_none(xpath_text(itemdoc, 'description'))
-
- timestamp = timeconvert(xpath_text(itemdoc, 'pubDate'))
-
- title_el = None
- if title_el is None:
- title_el = find_xpath_attr(
- itemdoc, './/{http://search.yahoo.com/mrss/}category',
- 'scheme', 'urn:mtvn:video_title')
- if title_el is None:
- title_el = itemdoc.find(compat_xpath('.//{http://search.yahoo.com/mrss/}title'))
- if title_el is None:
- title_el = itemdoc.find(compat_xpath('.//title'))
- if title_el.text is None:
- title_el = None
-
- title = title_el.text
- if title is None:
- raise ExtractorError('Could not find video title')
- title = title.strip()
-
- # This a short id that's used in the webpage urls
- mtvn_id = None
- mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category',
- 'scheme', 'urn:mtvn:id')
- if mtvn_id_node is not None:
- mtvn_id = mtvn_id_node.text
-
- formats = self._extract_video_formats(mediagen_doc, mtvn_id, video_id)
-
- # Some parts of complete video may be missing (e.g. missing Act 3 in
- # http://www.southpark.de/alle-episoden/s14e01-sexual-healing)
- if not formats:
- return None
-
- self._sort_formats(formats)
-
- return {
- 'title': title,
- 'formats': formats,
- 'subtitles': self._extract_subtitles(mediagen_doc, mtvn_id),
- 'id': video_id,
- 'thumbnail': self._get_thumbnail_url(uri, itemdoc),
- 'description': description,
- 'duration': float_or_none(content_el.attrib.get('duration')),
- 'timestamp': timestamp,
- }
-
- def _get_feed_query(self, uri):
- data = {'uri': uri}
- if self._LANG:
- data['lang'] = self._LANG
- return data
-
- def _get_videos_info(self, uri, use_hls=True):
- video_id = self._id_from_uri(uri)
- feed_url = self._get_feed_url(uri)
- info_url = update_url_query(feed_url, self._get_feed_query(uri))
- return self._get_videos_info_from_url(info_url, video_id, use_hls)
-
- def _get_videos_info_from_url(self, url, video_id, use_hls=True):
- idoc = self._download_xml(
- url, video_id,
- 'Downloading info', transform_source=fix_xml_ampersands)
-
- title = xpath_text(idoc, './channel/title')
- description = xpath_text(idoc, './channel/description')
-
- entries = []
- for item in idoc.findall('.//item'):
- info = self._get_video_info(item, use_hls)
- if info:
- entries.append(info)
-
- return self.playlist_result(
- entries, playlist_title=title, playlist_description=description)
-
- def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None):
- triforce_feed = self._parse_json(self._search_regex(
- r'triforceManifestFeed\s*=\s*({.+?})\s*;\s*\n', webpage,
- 'triforce feed', default='{}'), video_id, fatal=False)
-
- data_zone = self._search_regex(
- r'data-zone=(["\'])(?P<zone>.+?_lc_promo.*?)\1', webpage,
- 'data zone', default=data_zone, group='zone')
-
- feed_url = try_get(
- triforce_feed, lambda x: x['manifest']['zones'][data_zone]['feed'],
- compat_str)
- if not feed_url:
- return
-
- feed = self._download_json(feed_url, video_id, fatal=False)
- if not feed:
- return
-
- return try_get(feed, lambda x: x['result']['data']['id'], compat_str)
-
- def _extract_mgid(self, webpage):
- try:
- # the url can be http://media.mtvnservices.com/fb/{mgid}.swf
- # or http://media.mtvnservices.com/{mgid}
- og_url = self._og_search_video_url(webpage)
- mgid = url_basename(og_url)
- if mgid.endswith('.swf'):
- mgid = mgid[:-4]
- except RegexNotFoundError:
- mgid = None
-
- if mgid is None or ':' not in mgid:
- mgid = self._search_regex(
- [r'data-mgid="(.*?)"', r'swfobject\.embedSWF\(".*?(mgid:.*?)"'],
- webpage, 'mgid', default=None)
-
- if not mgid:
- sm4_embed = self._html_search_meta(
- 'sm4:video:embed', webpage, 'sm4 embed', default='')
- mgid = self._search_regex(
- r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=None)
-
- if not mgid:
- mgid = self._extract_triforce_mgid(webpage)
-
- return mgid
-
- def _real_extract(self, url):
- title = url_basename(url)
- webpage = self._download_webpage(url, title)
- mgid = self._extract_mgid(webpage)
- videos_info = self._get_videos_info(mgid)
- return videos_info
-
-
-class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
- IE_NAME = 'mtvservices:embedded'
- _VALID_URL = r'https?://media\.mtvnservices\.com/embed/(?P<mgid>.+?)(\?|/|$)'
-
- _TEST = {
- # From http://www.thewrap.com/peter-dinklage-sums-up-game-of-thrones-in-45-seconds-video/
- 'url': 'http://media.mtvnservices.com/embed/mgid:uma:video:mtv.com:1043906/cp~vid%3D1043906%26uri%3Dmgid%3Auma%3Avideo%3Amtv.com%3A1043906',
- 'md5': 'cb349b21a7897164cede95bd7bf3fbb9',
- 'info_dict': {
- 'id': '1043906',
- 'ext': 'mp4',
- 'title': 'Peter Dinklage Sums Up \'Game Of Thrones\' In 45 Seconds',
- 'description': '"Sexy sexy sexy, stabby stabby stabby, beautiful language," says Peter Dinklage as he tries summarizing "Game of Thrones" in under a minute.',
- 'timestamp': 1400126400,
- 'upload_date': '20140515',
- },
- }
-
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//media.mtvnservices.com/embed/.+?)\1', webpage)
- if mobj:
- return mobj.group('url')
-
- def _get_feed_url(self, uri):
- video_id = self._id_from_uri(uri)
- config = self._download_json(
- 'http://media.mtvnservices.com/pmt/e1/access/index.html?uri=%s&configtype=edge' % uri, video_id)
- return self._remove_template_parameter(config['feedWithQueryParams'])
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- mgid = mobj.group('mgid')
- return self._get_videos_info(mgid)
-
-
-class MTVIE(MTVServicesInfoExtractor):
- IE_NAME = 'mtv'
- _VALID_URL = r'https?://(?:www\.)?mtv\.com/(?:video-clips|(?:full-)?episodes)/(?P<id>[^/?#.]+)'
- _FEED_URL = 'http://www.mtv.com/feeds/mrss/'
-
- _TESTS = [{
- 'url': 'http://www.mtv.com/video-clips/vl8qof/unlocking-the-truth-trailer',
- 'md5': '1edbcdf1e7628e414a8c5dcebca3d32b',
- 'info_dict': {
- 'id': '5e14040d-18a4-47c4-a582-43ff602de88e',
- 'ext': 'mp4',
- 'title': 'Unlocking The Truth|July 18, 2016|1|101|Trailer',
- 'description': '"Unlocking the Truth" premieres August 17th at 11/10c.',
- 'timestamp': 1468846800,
- 'upload_date': '20160718',
- },
- }, {
- 'url': 'http://www.mtv.com/full-episodes/94tujl/unlocking-the-truth-gates-of-hell-season-1-ep-101',
- 'only_matching': True,
- }, {
- 'url': 'http://www.mtv.com/episodes/g8xu7q/teen-mom-2-breaking-the-wall-season-7-ep-713',
- 'only_matching': True,
- }]
-
-
-class MTV81IE(InfoExtractor):
- IE_NAME = 'mtv81'
- _VALID_URL = r'https?://(?:www\.)?mtv81\.com/videos/(?P<id>[^/?#.]+)'
-
- _TEST = {
- 'url': 'http://www.mtv81.com/videos/artist-to-watch/the-godfather-of-japanese-hip-hop-segment-1/',
- 'md5': '1edbcdf1e7628e414a8c5dcebca3d32b',
- 'info_dict': {
- 'id': '5e14040d-18a4-47c4-a582-43ff602de88e',
- 'ext': 'mp4',
- 'title': 'Unlocking The Truth|July 18, 2016|1|101|Trailer',
- 'description': '"Unlocking the Truth" premieres August 17th at 11/10c.',
- 'timestamp': 1468846800,
- 'upload_date': '20160718',
- },
- }
-
- def _extract_mgid(self, webpage):
- return self._search_regex(
- r'getTheVideo\((["\'])(?P<id>mgid:.+?)\1', webpage,
- 'mgid', group='id')
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- mgid = self._extract_mgid(webpage)
- return self.url_result('http://media.mtvnservices.com/embed/%s' % mgid)
-
-
-class MTVVideoIE(MTVServicesInfoExtractor):
- IE_NAME = 'mtv:video'
- _VALID_URL = r'''(?x)^https?://
- (?:(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$|
- m\.mtv\.com/videos/video\.rbml\?.*?id=(?P<mgid>[^&]+))'''
-
- _FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
-
- _TESTS = [
- {
- 'url': 'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
- 'md5': '850f3f143316b1e71fa56a4edfd6e0f8',
- 'info_dict': {
- 'id': '853555',
- 'ext': 'mp4',
- 'title': 'Taylor Swift - "Ours (VH1 Storytellers)"',
- 'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
- 'timestamp': 1352610000,
- 'upload_date': '20121111',
- },
- },
- ]
-
- def _get_thumbnail_url(self, uri, itemdoc):
- return 'http://mtv.mtvnimages.com/uri/' + uri
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('videoid')
- uri = mobj.groupdict().get('mgid')
- if uri is None:
- webpage = self._download_webpage(url, video_id)
-
- # Some videos come from Vevo.com
- m_vevo = re.search(
- r'(?s)isVevoVideo = true;.*?vevoVideoId = "(.*?)";', webpage)
- if m_vevo:
- vevo_id = m_vevo.group(1)
- self.to_screen('Vevo video detected: %s' % vevo_id)
- return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
-
- uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, 'uri')
- return self._get_videos_info(uri)
-
-
-class MTVDEIE(MTVServicesInfoExtractor):
- IE_NAME = 'mtv.de'
- _VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:artists|shows|news)/(?:[^/]+/)*(?P<id>\d+)-[^/#?]+/*(?:[#?].*)?$'
- _TESTS = [{
- 'url': 'http://www.mtv.de/artists/10571-cro/videos/61131-traum',
- 'info_dict': {
- 'id': 'music_video-a50bc5f0b3aa4b3190aa',
- 'ext': 'flv',
- 'title': 'MusicVideo_cro-traum',
- 'description': 'Cro - Traum',
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
- 'skip': 'Blocked at Travis CI',
- }, {
- # mediagen URL without query (e.g. http://videos.mtvnn.com/mediagen/e865da714c166d18d6f80893195fcb97)
- 'url': 'http://www.mtv.de/shows/933-teen-mom-2/staffeln/5353/folgen/63565-enthullungen',
- 'info_dict': {
- 'id': 'local_playlist-f5ae778b9832cc837189',
- 'ext': 'flv',
- 'title': 'Episode_teen-mom-2_shows_season-5_episode-1_full-episode_part1',
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
- 'skip': 'Blocked at Travis CI',
- }, {
- 'url': 'http://www.mtv.de/news/77491-mtv-movies-spotlight-pixels-teil-3',
- 'info_dict': {
- 'id': 'local_playlist-4e760566473c4c8c5344',
- 'ext': 'mp4',
- 'title': 'Article_mtv-movies-spotlight-pixels-teil-3_short-clips_part1',
- 'description': 'MTV Movies Supercut',
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
- 'skip': 'Das Video kann zur Zeit nicht abgespielt werden.',
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- playlist = self._parse_json(
- self._search_regex(
- r'window\.pagePlaylist\s*=\s*(\[.+?\]);\n', webpage, 'page playlist'),
- video_id)
-
- def _mrss_url(item):
- return item['mrss'] + item.get('mrssvars', '')
-
- # news pages contain single video in playlist with different id
- if len(playlist) == 1:
- return self._get_videos_info_from_url(_mrss_url(playlist[0]), video_id)
-
- for item in playlist:
- item_id = item.get('id')
- if item_id and compat_str(item_id) == video_id:
- return self._get_videos_info_from_url(_mrss_url(item), video_id)
diff --git a/youtube_dl/extractor/musicplayon.py b/youtube_dl/extractor/musicplayon.py
deleted file mode 100644
index 1854d59a5..000000000
--- a/youtube_dl/extractor/musicplayon.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..compat import compat_urlparse
-from ..utils import (
- int_or_none,
- js_to_json,
- mimetype2ext,
-)
-
-
-class MusicPlayOnIE(InfoExtractor):
- _VALID_URL = r'https?://(?:.+?\.)?musicplayon\.com/play(?:-touch)?\?(?:v|pl=\d+&play)=(?P<id>\d+)'
-
- _TESTS = [{
- 'url': 'http://en.musicplayon.com/play?v=433377',
- 'md5': '00cdcdea1726abdf500d1e7fd6dd59bb',
- 'info_dict': {
- 'id': '433377',
- 'ext': 'mp4',
- 'title': 'Rick Ross - Interview On Chelsea Lately (2014)',
- 'description': 'Rick Ross Interview On Chelsea Lately',
- 'duration': 342,
- 'uploader': 'ultrafish',
- },
- }, {
- 'url': 'http://en.musicplayon.com/play?pl=102&play=442629',
- 'only_matching': True,
- }]
-
- _URL_TEMPLATE = 'http://en.musicplayon.com/play?v=%s'
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- url = self._URL_TEMPLATE % video_id
-
- page = self._download_webpage(url, video_id)
-
- title = self._og_search_title(page)
- description = self._og_search_description(page)
- thumbnail = self._og_search_thumbnail(page)
- duration = self._html_search_meta('video:duration', page, 'duration', fatal=False)
- view_count = self._og_search_property('count', page, fatal=False)
- uploader = self._html_search_regex(
- r'<div>by&nbsp;<a href="[^"]+" class="purple">([^<]+)</a></div>', page, 'uploader', fatal=False)
-
- sources = self._parse_json(
- self._search_regex(r'setup\[\'_sources\'\]\s*=\s*([^;]+);', page, 'video sources'),
- video_id, transform_source=js_to_json)
- formats = [{
- 'url': compat_urlparse.urljoin(url, source['src']),
- 'ext': mimetype2ext(source.get('type')),
- 'format_note': source.get('data-res'),
- } for source in sources]
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'uploader': uploader,
- 'duration': int_or_none(duration),
- 'view_count': int_or_none(view_count),
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/myspass.py b/youtube_dl/extractor/myspass.py
deleted file mode 100644
index 2afe535b5..000000000
--- a/youtube_dl/extractor/myspass.py
+++ /dev/null
@@ -1,73 +0,0 @@
-from __future__ import unicode_literals
-import os.path
-
-from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_urlparse,
-)
-from ..utils import (
- ExtractorError,
-)
-
-
-class MySpassIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?myspass\.de/.*'
- _TEST = {
- 'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
- 'md5': '0b49f4844a068f8b33f4b7c88405862b',
- 'info_dict': {
- 'id': '11741',
- 'ext': 'mp4',
- 'description': 'Wer kann in die Fu\u00dfstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?',
- 'title': 'Absolute Mehrheit vom 17.02.2013 - Die Highlights, Teil 2',
- },
- }
-
- def _real_extract(self, url):
- META_DATA_URL_TEMPLATE = 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=%s'
-
- # video id is the last path element of the URL
- # usually there is a trailing slash, so also try the second but last
- url_path = compat_urllib_parse_urlparse(url).path
- url_parent_path, video_id = os.path.split(url_path)
- if not video_id:
- _, video_id = os.path.split(url_parent_path)
-
- # get metadata
- metadata_url = META_DATA_URL_TEMPLATE % video_id
- metadata = self._download_xml(
- metadata_url, video_id, transform_source=lambda s: s.strip())
-
- # extract values from metadata
- url_flv_el = metadata.find('url_flv')
- if url_flv_el is None:
- raise ExtractorError('Unable to extract download url')
- video_url = url_flv_el.text
- title_el = metadata.find('title')
- if title_el is None:
- raise ExtractorError('Unable to extract title')
- title = title_el.text
- format_id_el = metadata.find('format_id')
- if format_id_el is None:
- format = 'mp4'
- else:
- format = format_id_el.text
- description_el = metadata.find('description')
- if description_el is not None:
- description = description_el.text
- else:
- description = None
- imagePreview_el = metadata.find('imagePreview')
- if imagePreview_el is not None:
- thumbnail = imagePreview_el.text
- else:
- thumbnail = None
-
- return {
- 'id': video_id,
- 'url': video_url,
- 'title': title,
- 'format': format,
- 'thumbnail': thumbnail,
- 'description': description,
- }
diff --git a/youtube_dl/extractor/naver.py b/youtube_dl/extractor/naver.py
deleted file mode 100644
index bb3d94413..000000000
--- a/youtube_dl/extractor/naver.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- int_or_none,
- update_url_query,
-)
-
-
-class NaverIE(InfoExtractor):
- _VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/v/(?P<id>\d+)'
-
- _TESTS = [{
- 'url': 'http://tv.naver.com/v/81652',
- 'info_dict': {
- 'id': '81652',
- 'ext': 'mp4',
- 'title': '[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
- 'description': '합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
- 'upload_date': '20130903',
- },
- }, {
- 'url': 'http://tv.naver.com/v/395837',
- 'md5': '638ed4c12012c458fefcddfd01f173cd',
- 'info_dict': {
- 'id': '395837',
- 'ext': 'mp4',
- 'title': '9년이 지나도 아픈 기억, 전효성의 아버지',
- 'description': 'md5:5bf200dcbf4b66eb1b350d1eb9c753f7',
- 'upload_date': '20150519',
- },
- 'skip': 'Georestricted',
- }, {
- 'url': 'http://tvcast.naver.com/v/81652',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- vid = self._search_regex(
- r'videoId["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
- 'video id', fatal=None, group='value')
- in_key = self._search_regex(
- r'inKey["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
- 'key', default=None, group='value')
-
- if not vid or not in_key:
- error = self._html_search_regex(
- r'(?s)<div class="(?:nation_error|nation_box|error_box)">\s*(?:<!--.*?-->)?\s*<p class="[^"]+">(?P<msg>.+?)</p>\s*</div>',
- webpage, 'error', default=None)
- if error:
- raise ExtractorError(error, expected=True)
- raise ExtractorError('couldn\'t extract vid and key')
- video_data = self._download_json(
- 'http://play.rmcnmv.naver.com/vod/play/v2.0/' + vid,
- video_id, query={
- 'key': in_key,
- })
- meta = video_data['meta']
- title = meta['subject']
- formats = []
-
- def extract_formats(streams, stream_type, query={}):
- for stream in streams:
- stream_url = stream.get('source')
- if not stream_url:
- continue
- stream_url = update_url_query(stream_url, query)
- encoding_option = stream.get('encodingOption', {})
- bitrate = stream.get('bitrate', {})
- formats.append({
- 'format_id': '%s_%s' % (stream.get('type') or stream_type, encoding_option.get('id') or encoding_option.get('name')),
- 'url': stream_url,
- 'width': int_or_none(encoding_option.get('width')),
- 'height': int_or_none(encoding_option.get('height')),
- 'vbr': int_or_none(bitrate.get('video')),
- 'abr': int_or_none(bitrate.get('audio')),
- 'filesize': int_or_none(stream.get('size')),
- 'protocol': 'm3u8_native' if stream_type == 'HLS' else None,
- })
-
- extract_formats(video_data.get('videos', {}).get('list', []), 'H264')
- for stream_set in video_data.get('streams', []):
- query = {}
- for param in stream_set.get('keys', []):
- query[param['name']] = param['value']
- stream_type = stream_set.get('type')
- videos = stream_set.get('videos')
- if videos:
- extract_formats(videos, stream_type, query)
- elif stream_type == 'HLS':
- stream_url = stream_set.get('source')
- if not stream_url:
- continue
- formats.extend(self._extract_m3u8_formats(
- update_url_query(stream_url, query), video_id,
- 'mp4', 'm3u8_native', m3u8_id=stream_type, fatal=False))
- self._sort_formats(formats)
-
- subtitles = {}
- for caption in video_data.get('captions', {}).get('list', []):
- caption_url = caption.get('source')
- if not caption_url:
- continue
- subtitles.setdefault(caption.get('language') or caption.get('locale'), []).append({
- 'url': caption_url,
- })
-
- upload_date = self._search_regex(
- r'<span[^>]+class="date".*?(\d{4}\.\d{2}\.\d{2})',
- webpage, 'upload date', fatal=False)
- if upload_date:
- upload_date = upload_date.replace('.', '')
-
- return {
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- 'subtitles': subtitles,
- 'description': self._og_search_description(webpage),
- 'thumbnail': meta.get('cover', {}).get('source') or self._og_search_thumbnail(webpage),
- 'view_count': int_or_none(meta.get('count')),
- 'upload_date': upload_date,
- }
diff --git a/youtube_dl/extractor/nbc.py b/youtube_dl/extractor/nbc.py
deleted file mode 100644
index 10680b202..000000000
--- a/youtube_dl/extractor/nbc.py
+++ /dev/null
@@ -1,482 +0,0 @@
-from __future__ import unicode_literals
-
-import base64
-import json
-import re
-
-from .common import InfoExtractor
-from .theplatform import ThePlatformIE
-from .adobepass import AdobePassIE
-from ..compat import compat_urllib_parse_unquote
-from ..utils import (
- smuggle_url,
- update_url_query,
- int_or_none,
-)
-
-
-class NBCIE(AdobePassIE):
- _VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?P<id>n?\d+))'
-
- _TESTS = [
- {
- 'url': 'http://www.nbc.com/the-tonight-show/video/jimmy-fallon-surprises-fans-at-ben-jerrys/2848237',
- 'info_dict': {
- 'id': '2848237',
- 'ext': 'mp4',
- 'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
- 'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
- 'timestamp': 1424246400,
- 'upload_date': '20150218',
- 'uploader': 'NBCU-COM',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- },
- {
- 'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821',
- 'info_dict': {
- 'id': '2832821',
- 'ext': 'mp4',
- 'title': 'Star Wars Teaser',
- 'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442',
- 'timestamp': 1417852800,
- 'upload_date': '20141206',
- 'uploader': 'NBCU-COM',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- 'skip': 'Only works from US',
- },
- {
- # HLS streams requires the 'hdnea3' cookie
- 'url': 'http://www.nbc.com/Kings/video/goliath/n1806',
- 'info_dict': {
- 'id': '101528f5a9e8127b107e98c5e6ce4638',
- 'ext': 'mp4',
- 'title': 'Goliath',
- 'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.',
- 'timestamp': 1237100400,
- 'upload_date': '20090315',
- 'uploader': 'NBCU-COM',
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'Only works from US',
- },
- {
- 'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310',
- 'only_matching': True,
- },
- {
- # Percent escaped url
- 'url': 'https://www.nbc.com/up-all-night/video/day-after-valentine%27s-day/n2189',
- 'only_matching': True,
- }
- ]
-
- def _real_extract(self, url):
- permalink, video_id = re.match(self._VALID_URL, url).groups()
- permalink = 'http' + compat_urllib_parse_unquote(permalink)
- response = self._download_json(
- 'https://friendship.nbc.co/v2/graphql', video_id, query={
- 'query': '''{
- page(name: "%s", platform: web, type: VIDEO, userId: "0") {
- data {
- ... on VideoPageData {
- description
- episodeNumber
- keywords
- locked
- mpxAccountId
- mpxGuid
- rating
- seasonNumber
- secondaryTitle
- seriesShortTitle
- }
- }
- }
-}''' % permalink,
- })
- video_data = response['data']['page']['data']
- query = {
- 'mbr': 'true',
- 'manifest': 'm3u',
- }
- video_id = video_data['mpxGuid']
- title = video_data['secondaryTitle']
- if video_data.get('locked'):
- resource = self._get_mvpd_resource(
- 'nbcentertainment', title, video_id,
- video_data.get('rating'))
- query['auth'] = self._extract_mvpd_auth(
- url, video_id, 'nbcentertainment', resource)
- theplatform_url = smuggle_url(update_url_query(
- 'http://link.theplatform.com/s/NnzsPC/media/guid/%s/%s' % (video_data.get('mpxAccountId') or '2410887629', video_id),
- query), {'force_smil_url': True})
- return {
- '_type': 'url_transparent',
- 'id': video_id,
- 'title': title,
- 'url': theplatform_url,
- 'description': video_data.get('description'),
- 'tags': video_data.get('keywords'),
- 'season_number': int_or_none(video_data.get('seasonNumber')),
- 'episode_number': int_or_none(video_data.get('episodeNumber')),
- 'episode': title,
- 'series': video_data.get('seriesShortTitle'),
- 'ie_key': 'ThePlatform',
- }
-
-
-class NBCSportsVPlayerIE(InfoExtractor):
- _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
-
- _TESTS = [{
- 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI',
- 'info_dict': {
- 'id': '9CsDKds0kvHI',
- 'ext': 'mp4',
- 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
- 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
- 'timestamp': 1426270238,
- 'upload_date': '20150313',
- 'uploader': 'NBCU-SPORTS',
- }
- }, {
- 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/_hqLjQ95yx8Z',
- 'only_matching': True,
- }]
-
- @staticmethod
- def _extract_url(webpage):
- iframe_m = re.search(
- r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage)
- if iframe_m:
- return iframe_m.group('url')
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- theplatform_url = self._og_search_video_url(webpage).replace(
- 'vplayer.nbcsports.com', 'player.theplatform.com')
- return self.url_result(theplatform_url, 'ThePlatform')
-
-
-class NBCSportsIE(InfoExtractor):
- # Does not include https because its certificate is invalid
- _VALID_URL = r'https?://(?:www\.)?nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
-
- _TEST = {
- 'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
- 'info_dict': {
- 'id': 'PHJSaFWbrTY9',
- 'ext': 'flv',
- 'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
- 'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
- 'uploader': 'NBCU-SPORTS',
- 'upload_date': '20150330',
- 'timestamp': 1427726529,
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- return self.url_result(
- NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
-
-
-class NBCSportsStreamIE(AdobePassIE):
- _VALID_URL = r'https?://stream\.nbcsports\.com/.+?\bpid=(?P<id>\d+)'
- _TEST = {
- 'url': 'http://stream.nbcsports.com/nbcsn/generic?pid=206559',
- 'info_dict': {
- 'id': '206559',
- 'ext': 'mp4',
- 'title': 'Amgen Tour of California Women\'s Recap',
- 'description': 'md5:66520066b3b5281ada7698d0ea2aa894',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- 'skip': 'Requires Adobe Pass Authentication',
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- live_source = self._download_json(
- 'http://stream.nbcsports.com/data/live_sources_%s.json' % video_id,
- video_id)
- video_source = live_source['videoSources'][0]
- title = video_source['title']
- source_url = None
- for k in ('source', 'msl4source', 'iossource', 'hlsv4'):
- sk = k + 'Url'
- source_url = video_source.get(sk) or video_source.get(sk + 'Alt')
- if source_url:
- break
- else:
- source_url = video_source['ottStreamUrl']
- is_live = video_source.get('type') == 'live' or video_source.get('status') == 'Live'
- resource = self._get_mvpd_resource('nbcsports', title, video_id, '')
- token = self._extract_mvpd_auth(url, video_id, 'nbcsports', resource)
- tokenized_url = self._download_json(
- 'https://token.playmakerservices.com/cdn',
- video_id, data=json.dumps({
- 'requestorId': 'nbcsports',
- 'pid': video_id,
- 'application': 'NBCSports',
- 'version': 'v1',
- 'platform': 'desktop',
- 'cdn': 'akamai',
- 'url': video_source['sourceUrl'],
- 'token': base64.b64encode(token.encode()).decode(),
- 'resourceId': base64.b64encode(resource.encode()).decode(),
- }).encode())['tokenizedUrl']
- formats = self._extract_m3u8_formats(tokenized_url, video_id, 'mp4')
- self._sort_formats(formats)
- return {
- 'id': video_id,
- 'title': self._live_title(title) if is_live else title,
- 'description': live_source.get('description'),
- 'formats': formats,
- 'is_live': is_live,
- }
-
-
-class CSNNEIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?csnne\.com/video/(?P<id>[0-9a-z-]+)'
-
- _TEST = {
- 'url': 'http://www.csnne.com/video/snc-evening-update-wright-named-red-sox-no-5-starter',
- 'info_dict': {
- 'id': 'yvBLLUgQ8WU0',
- 'ext': 'mp4',
- 'title': 'SNC evening update: Wright named Red Sox\' No. 5 starter.',
- 'description': 'md5:1753cfee40d9352b19b4c9b3e589b9e3',
- 'timestamp': 1459369979,
- 'upload_date': '20160330',
- 'uploader': 'NBCU-SPORTS',
- }
- }
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
- return {
- '_type': 'url_transparent',
- 'ie_key': 'ThePlatform',
- 'url': self._html_search_meta('twitter:player:stream', webpage),
- 'display_id': display_id,
- }
-
-
-class NBCNewsIE(ThePlatformIE):
- _VALID_URL = r'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P<id>[^/?]+)'
-
- _TESTS = [
- {
- 'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
- 'md5': 'af1adfa51312291a017720403826bb64',
- 'info_dict': {
- 'id': '269389891880',
- 'ext': 'mp4',
- 'title': 'How Twitter Reacted To The Snowden Interview',
- 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
- 'uploader': 'NBCU-NEWS',
- 'timestamp': 1401363060,
- 'upload_date': '20140529',
- },
- },
- {
- 'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156',
- 'md5': 'fdbf39ab73a72df5896b6234ff98518a',
- 'info_dict': {
- 'id': '529953347624',
- 'ext': 'mp4',
- 'title': 'FULL EPISODE: Family Business',
- 'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
- },
- 'skip': 'This page is unavailable.',
- },
- {
- 'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
- 'md5': '73135a2e0ef819107bbb55a5a9b2a802',
- 'info_dict': {
- 'id': '394064451844',
- 'ext': 'mp4',
- 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
- 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
- 'timestamp': 1423104900,
- 'uploader': 'NBCU-NEWS',
- 'upload_date': '20150205',
- },
- },
- {
- 'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
- 'md5': 'a49e173825e5fcd15c13fc297fced39d',
- 'info_dict': {
- 'id': '529953347624',
- 'ext': 'mp4',
- 'title': 'Volkswagen U.S. Chief:\xa0 We Have Totally Screwed Up',
- 'description': 'md5:c8be487b2d80ff0594c005add88d8351',
- 'upload_date': '20150922',
- 'timestamp': 1442917800,
- 'uploader': 'NBCU-NEWS',
- },
- },
- {
- 'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
- 'md5': '118d7ca3f0bea6534f119c68ef539f71',
- 'info_dict': {
- 'id': '669831235788',
- 'ext': 'mp4',
- 'title': 'See the aurora borealis from space in stunning new NASA video',
- 'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
- 'upload_date': '20160420',
- 'timestamp': 1461152093,
- 'uploader': 'NBCU-NEWS',
- },
- },
- {
- 'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
- 'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
- 'info_dict': {
- 'id': '314487875924',
- 'ext': 'mp4',
- 'title': 'The chaotic GOP immigration vote',
- 'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'timestamp': 1406937606,
- 'upload_date': '20140802',
- 'uploader': 'NBCU-NEWS',
- },
- },
- {
- 'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952',
- 'only_matching': True,
- },
- {
- # From http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html
- 'url': 'http://www.nbcnews.com/widget/video-embed/701714499682',
- 'only_matching': True,
- },
- ]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- if not video_id.isdigit():
- webpage = self._download_webpage(url, video_id)
-
- data = self._parse_json(self._search_regex(
- r'window\.__data\s*=\s*({.+});', webpage,
- 'bootstrap json'), video_id)
- video_id = data['article']['content'][0]['primaryMedia']['video']['mpxMetadata']['id']
-
- return {
- '_type': 'url_transparent',
- 'id': video_id,
- # http://feed.theplatform.com/f/2E2eJC/nbcnews also works
- 'url': update_url_query('http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews', {'byId': video_id}),
- 'ie_key': 'ThePlatformFeed',
- }
-
-
-class NBCOlympicsIE(InfoExtractor):
- IE_NAME = 'nbcolympics'
- _VALID_URL = r'https?://www\.nbcolympics\.com/video/(?P<id>[a-z-]+)'
-
- _TEST = {
- # Geo-restricted to US
- 'url': 'http://www.nbcolympics.com/video/justin-roses-son-leo-was-tears-after-his-dad-won-gold',
- 'md5': '54fecf846d05429fbaa18af557ee523a',
- 'info_dict': {
- 'id': 'WjTBzDXx5AUq',
- 'display_id': 'justin-roses-son-leo-was-tears-after-his-dad-won-gold',
- 'ext': 'mp4',
- 'title': 'Rose\'s son Leo was in tears after his dad won gold',
- 'description': 'Olympic gold medalist Justin Rose gets emotional talking to the impact his win in men\'s golf has already had on his children.',
- 'timestamp': 1471274964,
- 'upload_date': '20160815',
- 'uploader': 'NBCU-SPORTS',
- },
- }
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- drupal_settings = self._parse_json(self._search_regex(
- r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
- webpage, 'drupal settings'), display_id)
-
- iframe_url = drupal_settings['vod']['iframe_url']
- theplatform_url = iframe_url.replace(
- 'vplayer.nbcolympics.com', 'player.theplatform.com')
-
- return {
- '_type': 'url_transparent',
- 'url': theplatform_url,
- 'ie_key': ThePlatformIE.ie_key(),
- 'display_id': display_id,
- }
-
-
-class NBCOlympicsStreamIE(AdobePassIE):
- IE_NAME = 'nbcolympics:stream'
- _VALID_URL = r'https?://stream\.nbcolympics\.com/(?P<id>[0-9a-z-]+)'
- _TEST = {
- 'url': 'http://stream.nbcolympics.com/2018-winter-olympics-nbcsn-evening-feb-8',
- 'info_dict': {
- 'id': '203493',
- 'ext': 'mp4',
- 'title': 're:Curling, Alpine, Luge [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }
- _DATA_URL_TEMPLATE = 'http://stream.nbcolympics.com/data/%s_%s.json'
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
- pid = self._search_regex(r'pid\s*=\s*(\d+);', webpage, 'pid')
- resource = self._search_regex(
- r"resource\s*=\s*'(.+)';", webpage,
- 'resource').replace("' + pid + '", pid)
- event_config = self._download_json(
- self._DATA_URL_TEMPLATE % ('event_config', pid),
- pid)['eventConfig']
- title = self._live_title(event_config['eventTitle'])
- source_url = self._download_json(
- self._DATA_URL_TEMPLATE % ('live_sources', pid),
- pid)['videoSources'][0]['sourceUrl']
- media_token = self._extract_mvpd_auth(
- url, pid, event_config.get('requestorId', 'NBCOlympics'), resource)
- formats = self._extract_m3u8_formats(self._download_webpage(
- 'http://sp.auth.adobe.com/tvs/v1/sign', pid, query={
- 'cdn': 'akamai',
- 'mediaToken': base64.b64encode(media_token.encode()),
- 'resource': base64.b64encode(resource.encode()),
- 'url': source_url,
- }), pid, 'mp4')
- self._sort_formats(formats)
-
- return {
- 'id': pid,
- 'display_id': display_id,
- 'title': title,
- 'formats': formats,
- 'is_live': True,
- }
diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py
deleted file mode 100644
index aec2ea133..000000000
--- a/youtube_dl/extractor/ndr.py
+++ /dev/null
@@ -1,389 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- determine_ext,
- int_or_none,
- parse_iso8601,
- qualities,
-)
-
-
-class NDRBaseIE(InfoExtractor):
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- display_id = next(group for group in mobj.groups() if group)
- webpage = self._download_webpage(url, display_id)
- return self._extract_embed(webpage, display_id)
-
-
-class NDRIE(NDRBaseIE):
- IE_NAME = 'ndr'
- IE_DESC = 'NDR.de - Norddeutscher Rundfunk'
- _VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html'
- _TESTS = [{
- # httpVideo, same content id
- 'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
- 'md5': '6515bc255dc5c5f8c85bbc38e035a659',
- 'info_dict': {
- 'id': 'hafengeburtstag988',
- 'display_id': 'Party-Poette-und-Parade',
- 'ext': 'mp4',
- 'title': 'Party, Pötte und Parade',
- 'description': 'md5:ad14f9d2f91d3040b6930c697e5f6b4c',
- 'uploader': 'ndrtv',
- 'timestamp': 1431108900,
- 'upload_date': '20150510',
- 'duration': 3498,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- # httpVideo, different content id
- 'url': 'http://www.ndr.de/sport/fussball/40-Osnabrueck-spielt-sich-in-einen-Rausch,osna270.html',
- 'md5': '1043ff203eab307f0c51702ec49e9a71',
- 'info_dict': {
- 'id': 'osna272',
- 'display_id': '40-Osnabrueck-spielt-sich-in-einen-Rausch',
- 'ext': 'mp4',
- 'title': 'Osnabrück - Wehen Wiesbaden: Die Highlights',
- 'description': 'md5:32e9b800b3d2d4008103752682d5dc01',
- 'uploader': 'ndrtv',
- 'timestamp': 1442059200,
- 'upload_date': '20150912',
- 'duration': 510,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- # httpAudio, same content id
- 'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html',
- 'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
- 'info_dict': {
- 'id': 'audio51535',
- 'display_id': 'La-Valette-entgeht-der-Hinrichtung',
- 'ext': 'mp3',
- 'title': 'La Valette entgeht der Hinrichtung',
- 'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
- 'uploader': 'ndrinfo',
- 'timestamp': 1290626100,
- 'upload_date': '20140729',
- 'duration': 884,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html',
- 'only_matching': True,
- }]
-
- def _extract_embed(self, webpage, display_id):
- embed_url = self._html_search_meta(
- 'embedURL', webpage, 'embed URL', fatal=True)
- description = self._search_regex(
- r'<p[^>]+itemprop="description">([^<]+)</p>',
- webpage, 'description', default=None) or self._og_search_description(webpage)
- timestamp = parse_iso8601(
- self._search_regex(
- r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="([^"]+)"',
- webpage, 'upload date', fatal=False))
- return {
- '_type': 'url_transparent',
- 'url': embed_url,
- 'display_id': display_id,
- 'description': description,
- 'timestamp': timestamp,
- }
-
-
-class NJoyIE(NDRBaseIE):
- IE_NAME = 'njoy'
- IE_DESC = 'N-JOY'
- _VALID_URL = r'https?://(?:www\.)?n-joy\.de/(?:[^/]+/)*(?:(?P<display_id>[^/?#]+),)?(?P<id>[\da-z]+)\.html'
- _TESTS = [{
- # httpVideo, same content id
- 'url': 'http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html',
- 'md5': 'cb63be60cd6f9dd75218803146d8dc67',
- 'info_dict': {
- 'id': 'comedycontest2480',
- 'display_id': 'Benaissa-beim-NDR-Comedy-Contest',
- 'ext': 'mp4',
- 'title': 'Benaissa beim NDR Comedy Contest',
- 'description': 'md5:f057a6c4e1c728b10d33b5ffd36ddc39',
- 'uploader': 'ndrtv',
- 'upload_date': '20141129',
- 'duration': 654,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- # httpVideo, different content id
- 'url': 'http://www.n-joy.de/musik/Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-,felixjaehn168.html',
- 'md5': '417660fffa90e6df2fda19f1b40a64d8',
- 'info_dict': {
- 'id': 'dockville882',
- 'display_id': 'Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-',
- 'ext': 'mp4',
- 'title': '"Ich hab noch nie" mit Felix Jaehn',
- 'description': 'md5:85dd312d53be1b99e1f998a16452a2f3',
- 'uploader': 'njoy',
- 'upload_date': '20150822',
- 'duration': 211,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'http://www.n-joy.de/radio/webradio/morningshow209.html',
- 'only_matching': True,
- }]
-
- def _extract_embed(self, webpage, display_id):
- video_id = self._search_regex(
- r'<iframe[^>]+id="pp_([\da-z]+)"', webpage, 'embed id')
- description = self._search_regex(
- r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>',
- webpage, 'description', fatal=False)
- return {
- '_type': 'url_transparent',
- 'ie_key': 'NDREmbedBase',
- 'url': 'ndr:%s' % video_id,
- 'display_id': display_id,
- 'description': description,
- }
-
-
-class NDREmbedBaseIE(InfoExtractor):
- IE_NAME = 'ndr:embed:base'
- _VALID_URL = r'(?:ndr:(?P<id_s>[\da-z]+)|https?://www\.ndr\.de/(?P<id>[\da-z]+)-ppjson\.json)'
- _TESTS = [{
- 'url': 'ndr:soundcheck3366',
- 'only_matching': True,
- }, {
- 'url': 'http://www.ndr.de/soundcheck3366-ppjson.json',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id') or mobj.group('id_s')
-
- ppjson = self._download_json(
- 'http://www.ndr.de/%s-ppjson.json' % video_id, video_id)
-
- playlist = ppjson['playlist']
-
- formats = []
- quality_key = qualities(('xs', 's', 'm', 'l', 'xl'))
-
- for format_id, f in playlist.items():
- src = f.get('src')
- if not src:
- continue
- ext = determine_ext(src, None)
- if ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- src + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id,
- f4m_id='hds', fatal=False))
- elif ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- src, video_id, 'mp4', m3u8_id='hls',
- entry_protocol='m3u8_native', fatal=False))
- else:
- quality = f.get('quality')
- ff = {
- 'url': src,
- 'format_id': quality or format_id,
- 'quality': quality_key(quality),
- }
- type_ = f.get('type')
- if type_ and type_.split('/')[0] == 'audio':
- ff['vcodec'] = 'none'
- ff['ext'] = ext or 'mp3'
- formats.append(ff)
- self._sort_formats(formats)
-
- config = playlist['config']
-
- live = playlist.get('config', {}).get('streamType') in ['httpVideoLive', 'httpAudioLive']
- title = config['title']
- if live:
- title = self._live_title(title)
- uploader = ppjson.get('config', {}).get('branding')
- upload_date = ppjson.get('config', {}).get('publicationDate')
- duration = int_or_none(config.get('duration'))
-
- thumbnails = [{
- 'id': thumbnail.get('quality') or thumbnail_id,
- 'url': thumbnail['src'],
- 'preference': quality_key(thumbnail.get('quality')),
- } for thumbnail_id, thumbnail in config.get('poster', {}).items() if thumbnail.get('src')]
-
- return {
- 'id': video_id,
- 'title': title,
- 'is_live': live,
- 'uploader': uploader if uploader != '-' else None,
- 'upload_date': upload_date[0:8] if upload_date else None,
- 'duration': duration,
- 'thumbnails': thumbnails,
- 'formats': formats,
- }
-
-
-class NDREmbedIE(NDREmbedBaseIE):
- IE_NAME = 'ndr:embed'
- _VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html'
- _TESTS = [{
- 'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html',
- 'md5': '8b9306142fe65bbdefb5ce24edb6b0a9',
- 'info_dict': {
- 'id': 'ndraktuell28488',
- 'ext': 'mp4',
- 'title': 'Norddeutschland begrüßt Flüchtlinge',
- 'is_live': False,
- 'uploader': 'ndrtv',
- 'upload_date': '20150907',
- 'duration': 132,
- },
- }, {
- 'url': 'http://www.ndr.de/ndr2/events/soundcheck/soundcheck3366-player.html',
- 'md5': '002085c44bae38802d94ae5802a36e78',
- 'info_dict': {
- 'id': 'soundcheck3366',
- 'ext': 'mp4',
- 'title': 'Ella Henderson braucht Vergleiche nicht zu scheuen',
- 'is_live': False,
- 'uploader': 'ndr2',
- 'upload_date': '20150912',
- 'duration': 3554,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'http://www.ndr.de/info/audio51535-player.html',
- 'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
- 'info_dict': {
- 'id': 'audio51535',
- 'ext': 'mp3',
- 'title': 'La Valette entgeht der Hinrichtung',
- 'is_live': False,
- 'uploader': 'ndrinfo',
- 'upload_date': '20140729',
- 'duration': 884,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'http://www.ndr.de/fernsehen/sendungen/visite/visite11010-externalPlayer.html',
- 'md5': 'ae57f80511c1e1f2fd0d0d3d31aeae7c',
- 'info_dict': {
- 'id': 'visite11010',
- 'ext': 'mp4',
- 'title': 'Visite - die ganze Sendung',
- 'is_live': False,
- 'uploader': 'ndrtv',
- 'upload_date': '20150902',
- 'duration': 3525,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- # httpVideoLive
- 'url': 'http://www.ndr.de/fernsehen/livestream/livestream217-externalPlayer.html',
- 'info_dict': {
- 'id': 'livestream217',
- 'ext': 'flv',
- 'title': r're:^NDR Fernsehen Niedersachsen \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
- 'is_live': True,
- 'upload_date': '20150910',
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'http://www.ndr.de/ndrkultur/audio255020-player.html',
- 'only_matching': True,
- }, {
- 'url': 'http://www.ndr.de/fernsehen/sendungen/nordtour/nordtour7124-player.html',
- 'only_matching': True,
- }, {
- 'url': 'http://www.ndr.de/kultur/film/videos/videoimport10424-player.html',
- 'only_matching': True,
- }, {
- 'url': 'http://www.ndr.de/fernsehen/sendungen/hamburg_journal/hamj43006-player.html',
- 'only_matching': True,
- }, {
- 'url': 'http://www.ndr.de/fernsehen/sendungen/weltbilder/weltbilder4518-player.html',
- 'only_matching': True,
- }, {
- 'url': 'http://www.ndr.de/fernsehen/doku952-player.html',
- 'only_matching': True,
- }]
-
-
-class NJoyEmbedIE(NDREmbedBaseIE):
- IE_NAME = 'njoy:embed'
- _VALID_URL = r'https?://(?:www\.)?n-joy\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html'
- _TESTS = [{
- # httpVideo
- 'url': 'http://www.n-joy.de/events/reeperbahnfestival/doku948-player_image-bc168e87-5263-4d6d-bd27-bb643005a6de_theme-n-joy.html',
- 'md5': '8483cbfe2320bd4d28a349d62d88bd74',
- 'info_dict': {
- 'id': 'doku948',
- 'ext': 'mp4',
- 'title': 'Zehn Jahre Reeperbahn Festival - die Doku',
- 'is_live': False,
- 'upload_date': '20150807',
- 'duration': 1011,
- },
- }, {
- # httpAudio
- 'url': 'http://www.n-joy.de/news_wissen/stefanrichter100-player_image-d5e938b1-f21a-4b9a-86b8-aaba8bca3a13_theme-n-joy.html',
- 'md5': 'd989f80f28ac954430f7b8a48197188a',
- 'info_dict': {
- 'id': 'stefanrichter100',
- 'ext': 'mp3',
- 'title': 'Interview mit einem Augenzeugen',
- 'is_live': False,
- 'uploader': 'njoy',
- 'upload_date': '20150909',
- 'duration': 140,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- # httpAudioLive, no explicit ext
- 'url': 'http://www.n-joy.de/news_wissen/webradioweltweit100-player_image-3fec0484-2244-4565-8fb8-ed25fd28b173_theme-n-joy.html',
- 'info_dict': {
- 'id': 'webradioweltweit100',
- 'ext': 'mp3',
- 'title': r're:^N-JOY Weltweit \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
- 'is_live': True,
- 'uploader': 'njoy',
- 'upload_date': '20150810',
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'http://www.n-joy.de/musik/dockville882-player_image-3905259e-0803-4764-ac72-8b7de077d80a_theme-n-joy.html',
- 'only_matching': True,
- }, {
- 'url': 'http://www.n-joy.de/radio/sendungen/morningshow/urlaubsfotos190-player_image-066a5df1-5c95-49ec-a323-941d848718db_theme-n-joy.html',
- 'only_matching': True,
- }, {
- 'url': 'http://www.n-joy.de/entertainment/comedy/krudetv290-player_image-ab261bfe-51bf-4bf3-87ba-c5122ee35b3d_theme-n-joy.html',
- 'only_matching': True,
- }]
diff --git a/youtube_dl/extractor/nexx.py b/youtube_dl/extractor/nexx.py
deleted file mode 100644
index f9aad83c4..000000000
--- a/youtube_dl/extractor/nexx.py
+++ /dev/null
@@ -1,451 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import hashlib
-import random
-import re
-import time
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- ExtractorError,
- int_or_none,
- parse_duration,
- try_get,
- urlencode_postdata,
-)
-
-
-class NexxIE(InfoExtractor):
- _VALID_URL = r'''(?x)
- (?:
- https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P<domain_id>\d+)/videos/byid/|
- nexx:(?:(?P<domain_id_s>\d+):)?|
- https?://arc\.nexx\.cloud/api/video/
- )
- (?P<id>\d+)
- '''
- _TESTS = [{
- # movie
- 'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907',
- 'md5': '31899fd683de49ad46f4ee67e53e83fe',
- 'info_dict': {
- 'id': '128907',
- 'ext': 'mp4',
- 'title': 'Stiftung Warentest',
- 'alt_title': 'Wie ein Test abläuft',
- 'description': 'md5:d1ddb1ef63de721132abd38639cc2fd2',
- 'creator': 'SPIEGEL TV',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 2509,
- 'timestamp': 1384264416,
- 'upload_date': '20131112',
- },
- }, {
- # episode
- 'url': 'https://api.nexx.cloud/v3/741/videos/byid/247858',
- 'info_dict': {
- 'id': '247858',
- 'ext': 'mp4',
- 'title': 'Return of the Golden Child (OV)',
- 'description': 'md5:5d969537509a92b733de21bae249dc63',
- 'release_year': 2017,
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 1397,
- 'timestamp': 1495033267,
- 'upload_date': '20170517',
- 'episode_number': 2,
- 'season_number': 2,
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'HTTP Error 404: Not Found',
- }, {
- # does not work via arc
- 'url': 'nexx:741:1269984',
- 'md5': 'c714b5b238b2958dc8d5642addba6886',
- 'info_dict': {
- 'id': '1269984',
- 'ext': 'mp4',
- 'title': '1 TAG ohne KLO... wortwörtlich! 😑',
- 'alt_title': '1 TAG ohne KLO... wortwörtlich! 😑',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 607,
- 'timestamp': 1518614955,
- 'upload_date': '20180214',
- },
- }, {
- # free cdn from http://www.spiegel.de/video/eifel-zoo-aufregung-um-ausgebrochene-raubtiere-video-99018031.html
- 'url': 'nexx:747:1533779',
- 'md5': '6bf6883912b82b7069fb86c2297e9893',
- 'info_dict': {
- 'id': '1533779',
- 'ext': 'mp4',
- 'title': 'Aufregung um ausgebrochene Raubtiere',
- 'alt_title': 'Eifel-Zoo',
- 'description': 'md5:f21375c91c74ad741dcb164c427999d2',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 111,
- 'timestamp': 1527874460,
- 'upload_date': '20180601',
- },
- }, {
- 'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
- 'only_matching': True,
- }, {
- 'url': 'nexx:748:128907',
- 'only_matching': True,
- }, {
- 'url': 'nexx:128907',
- 'only_matching': True,
- }, {
- 'url': 'https://arc.nexx.cloud/api/video/128907.json',
- 'only_matching': True,
- }]
-
- @staticmethod
- def _extract_domain_id(webpage):
- mobj = re.search(
- r'<script\b[^>]+\bsrc=["\'](?:https?:)?//require\.nexx(?:\.cloud|cdn\.com)/(?P<id>\d+)',
- webpage)
- return mobj.group('id') if mobj else None
-
- @staticmethod
- def _extract_urls(webpage):
- # Reference:
- # 1. https://nx-s.akamaized.net/files/201510/44.pdf
-
- entries = []
-
- # JavaScript Integration
- domain_id = NexxIE._extract_domain_id(webpage)
- if domain_id:
- for video_id in re.findall(
- r'(?is)onPLAYReady.+?_play\.init\s*\(.+?\s*,\s*["\']?(\d+)',
- webpage):
- entries.append(
- 'https://api.nexx.cloud/v3/%s/videos/byid/%s'
- % (domain_id, video_id))
-
- # TODO: support more embed formats
-
- return entries
-
- @staticmethod
- def _extract_url(webpage):
- return NexxIE._extract_urls(webpage)[0]
-
- def _handle_error(self, response):
- status = int_or_none(try_get(
- response, lambda x: x['metadata']['status']) or 200)
- if 200 <= status < 300:
- return
- raise ExtractorError(
- '%s said: %s' % (self.IE_NAME, response['metadata']['errorhint']),
- expected=True)
-
- def _call_api(self, domain_id, path, video_id, data=None, headers={}):
- headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'
- result = self._download_json(
- 'https://api.nexx.cloud/v3/%s/%s' % (domain_id, path), video_id,
- 'Downloading %s JSON' % path, data=urlencode_postdata(data),
- headers=headers)
- self._handle_error(result)
- return result['result']
-
- def _extract_free_formats(self, video, video_id):
- stream_data = video['streamdata']
- cdn = stream_data['cdnType']
- assert cdn == 'free'
-
- hash = video['general']['hash']
-
- ps = compat_str(stream_data['originalDomain'])
- if stream_data['applyFolderHierarchy'] == 1:
- s = ('%04d' % int(video_id))[::-1]
- ps += '/%s/%s' % (s[0:2], s[2:4])
- ps += '/%s/%s_' % (video_id, hash)
-
- t = 'http://%s' + ps
- fd = stream_data['azureFileDistribution'].split(',')
- cdn_provider = stream_data['cdnProvider']
-
- def p0(p):
- return '_%s' % p if stream_data['applyAzureStructure'] == 1 else ''
-
- formats = []
- if cdn_provider == 'ak':
- t += ','
- for i in fd:
- p = i.split(':')
- t += p[1] + p0(int(p[0])) + ','
- t += '.mp4.csmil/master.%s'
- elif cdn_provider == 'ce':
- k = t.split('/')
- h = k.pop()
- http_base = t = '/'.join(k)
- http_base = http_base % stream_data['cdnPathHTTP']
- t += '/asset.ism/manifest.%s?dcp_ver=aos4&videostream='
- for i in fd:
- p = i.split(':')
- tbr = int(p[0])
- filename = '%s%s%s.mp4' % (h, p[1], p0(tbr))
- f = {
- 'url': http_base + '/' + filename,
- 'format_id': '%s-http-%d' % (cdn, tbr),
- 'tbr': tbr,
- }
- width_height = p[1].split('x')
- if len(width_height) == 2:
- f.update({
- 'width': int_or_none(width_height[0]),
- 'height': int_or_none(width_height[1]),
- })
- formats.append(f)
- a = filename + ':%s' % (tbr * 1000)
- t += a + ','
- t = t[:-1] + '&audiostream=' + a.split(':')[0]
- else:
- assert False
-
- if cdn_provider == 'ce':
- formats.extend(self._extract_mpd_formats(
- t % (stream_data['cdnPathDASH'], 'mpd'), video_id,
- mpd_id='%s-dash' % cdn, fatal=False))
- formats.extend(self._extract_m3u8_formats(
- t % (stream_data['cdnPathHLS'], 'm3u8'), video_id, 'mp4',
- entry_protocol='m3u8_native', m3u8_id='%s-hls' % cdn, fatal=False))
-
- return formats
-
- def _extract_azure_formats(self, video, video_id):
- stream_data = video['streamdata']
- cdn = stream_data['cdnType']
- assert cdn == 'azure'
-
- azure_locator = stream_data['azureLocator']
-
- def get_cdn_shield_base(shield_type='', static=False):
- for secure in ('', 's'):
- cdn_shield = stream_data.get('cdnShield%sHTTP%s' % (shield_type, secure.upper()))
- if cdn_shield:
- return 'http%s://%s' % (secure, cdn_shield)
- else:
- if 'fb' in stream_data['azureAccount']:
- prefix = 'df' if static else 'f'
- else:
- prefix = 'd' if static else 'p'
- account = int(stream_data['azureAccount'].replace('nexxplayplus', '').replace('nexxplayfb', ''))
- return 'http://nx-%s%02d.akamaized.net/' % (prefix, account)
-
- language = video['general'].get('language_raw') or ''
-
- azure_stream_base = get_cdn_shield_base()
- is_ml = ',' in language
- azure_manifest_url = '%s%s/%s_src%s.ism/Manifest' % (
- azure_stream_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + '%s'
-
- protection_token = try_get(
- video, lambda x: x['protectiondata']['token'], compat_str)
- if protection_token:
- azure_manifest_url += '?hdnts=%s' % protection_token
-
- formats = self._extract_m3u8_formats(
- azure_manifest_url % '(format=m3u8-aapl)',
- video_id, 'mp4', 'm3u8_native',
- m3u8_id='%s-hls' % cdn, fatal=False)
- formats.extend(self._extract_mpd_formats(
- azure_manifest_url % '(format=mpd-time-csf)',
- video_id, mpd_id='%s-dash' % cdn, fatal=False))
- formats.extend(self._extract_ism_formats(
- azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False))
-
- azure_progressive_base = get_cdn_shield_base('Prog', True)
- azure_file_distribution = stream_data.get('azureFileDistribution')
- if azure_file_distribution:
- fds = azure_file_distribution.split(',')
- if fds:
- for fd in fds:
- ss = fd.split(':')
- if len(ss) == 2:
- tbr = int_or_none(ss[0])
- if tbr:
- f = {
- 'url': '%s%s/%s_src_%s_%d.mp4' % (
- azure_progressive_base, azure_locator, video_id, ss[1], tbr),
- 'format_id': '%s-http-%d' % (cdn, tbr),
- 'tbr': tbr,
- }
- width_height = ss[1].split('x')
- if len(width_height) == 2:
- f.update({
- 'width': int_or_none(width_height[0]),
- 'height': int_or_none(width_height[1]),
- })
- formats.append(f)
-
- return formats
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- domain_id = mobj.group('domain_id') or mobj.group('domain_id_s')
- video_id = mobj.group('id')
-
- video = None
-
- def find_video(result):
- if isinstance(result, dict):
- return result
- elif isinstance(result, list):
- vid = int(video_id)
- for v in result:
- if try_get(v, lambda x: x['general']['ID'], int) == vid:
- return v
- return None
-
- response = self._download_json(
- 'https://arc.nexx.cloud/api/video/%s.json' % video_id,
- video_id, fatal=False)
- if response and isinstance(response, dict):
- result = response.get('result')
- if result:
- video = find_video(result)
-
- # not all videos work via arc, e.g. nexx:741:1269984
- if not video:
- # Reverse engineered from JS code (see getDeviceID function)
- device_id = '%d:%d:%d%d' % (
- random.randint(1, 4), int(time.time()),
- random.randint(1e4, 99999), random.randint(1, 9))
-
- result = self._call_api(domain_id, 'session/init', video_id, data={
- 'nxp_devh': device_id,
- 'nxp_userh': '',
- 'precid': '0',
- 'playlicense': '0',
- 'screenx': '1920',
- 'screeny': '1080',
- 'playerversion': '6.0.00',
- 'gateway': 'html5',
- 'adGateway': '',
- 'explicitlanguage': 'en-US',
- 'addTextTemplates': '1',
- 'addDomainData': '1',
- 'addAdModel': '1',
- }, headers={
- 'X-Request-Enable-Auth-Fallback': '1',
- })
-
- cid = result['general']['cid']
-
- # As described in [1] X-Request-Token generation algorithm is
- # as follows:
- # md5( operation + domain_id + domain_secret )
- # where domain_secret is a static value that will be given by nexx.tv
- # as per [1]. Here is how this "secret" is generated (reversed
- # from _play.api.init function, search for clienttoken). So it's
- # actually not static and not that much of a secret.
- # 1. https://nexxtvstorage.blob.core.windows.net/files/201610/27.pdf
- secret = result['device']['clienttoken'][int(device_id[0]):]
- secret = secret[0:len(secret) - int(device_id[-1])]
-
- op = 'byid'
-
- # Reversed from JS code for _play.api.call function (search for
- # X-Request-Token)
- request_token = hashlib.md5(
- ''.join((op, domain_id, secret)).encode('utf-8')).hexdigest()
-
- result = self._call_api(
- domain_id, 'videos/%s/%s' % (op, video_id), video_id, data={
- 'additionalfields': 'language,channel,actors,studio,licenseby,slug,subtitle,teaser,description',
- 'addInteractionOptions': '1',
- 'addStatusDetails': '1',
- 'addStreamDetails': '1',
- 'addCaptions': '1',
- 'addScenes': '1',
- 'addHotSpots': '1',
- 'addBumpers': '1',
- 'captionFormat': 'data',
- }, headers={
- 'X-Request-CID': cid,
- 'X-Request-Token': request_token,
- })
- video = find_video(result)
-
- general = video['general']
- title = general['title']
-
- cdn = video['streamdata']['cdnType']
-
- if cdn == 'azure':
- formats = self._extract_azure_formats(video, video_id)
- elif cdn == 'free':
- formats = self._extract_free_formats(video, video_id)
- else:
- # TODO: reverse more cdns
- assert False
-
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'alt_title': general.get('subtitle'),
- 'description': general.get('description'),
- 'release_year': int_or_none(general.get('year')),
- 'creator': general.get('studio') or general.get('studio_adref'),
- 'thumbnail': try_get(
- video, lambda x: x['imagedata']['thumb'], compat_str),
- 'duration': parse_duration(general.get('runtime')),
- 'timestamp': int_or_none(general.get('uploaded')),
- 'episode_number': int_or_none(try_get(
- video, lambda x: x['episodedata']['episode'])),
- 'season_number': int_or_none(try_get(
- video, lambda x: x['episodedata']['season'])),
- 'formats': formats,
- }
-
-
-class NexxEmbedIE(InfoExtractor):
- _VALID_URL = r'https?://embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?P<id>[^/?#&]+)'
- _TEST = {
- 'url': 'http://embed.nexx.cloud/748/KC1614647Z27Y7T?autoplay=1',
- 'md5': '16746bfc28c42049492385c989b26c4a',
- 'info_dict': {
- 'id': '161464',
- 'ext': 'mp4',
- 'title': 'Nervenkitzel Achterbahn',
- 'alt_title': 'Karussellbauer in Deutschland',
- 'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc',
- 'release_year': 2005,
- 'creator': 'SPIEGEL TV',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 2761,
- 'timestamp': 1394021479,
- 'upload_date': '20140305',
- },
- 'params': {
- 'format': 'bestvideo',
- 'skip_download': True,
- },
- }
-
- @staticmethod
- def _extract_urls(webpage):
- # Reference:
- # 1. https://nx-s.akamaized.net/files/201510/44.pdf
-
- # iFrame Embed Integration
- return [mobj.group('url') for mobj in re.finditer(
- r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?:(?!\1).)+)\1',
- webpage)]
-
- def _real_extract(self, url):
- embed_id = self._match_id(url)
-
- webpage = self._download_webpage(url, embed_id)
-
- return self.url_result(NexxIE._extract_url(webpage), ie=NexxIE.ie_key())
diff --git a/youtube_dl/extractor/nhk.py b/youtube_dl/extractor/nhk.py
deleted file mode 100644
index 6a2c6cb7b..000000000
--- a/youtube_dl/extractor/nhk.py
+++ /dev/null
@@ -1,96 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-
-
-class NhkVodIE(InfoExtractor):
- _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand/(?P<type>video|audio)/(?P<id>\d{7}|[a-z]+-\d{8}-\d+)'
- # Content available only for a limited period of time. Visit
- # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
- _TESTS = [{
- # clip
- 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
- 'md5': '256a1be14f48d960a7e61e2532d95ec3',
- 'info_dict': {
- 'id': 'a95j5iza',
- 'ext': 'mp4',
- 'title': "Dining with the Chef - Chef Saito's Family recipe: MENCHI-KATSU",
- 'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
- 'timestamp': 1565965194,
- 'upload_date': '20190816',
- },
- }, {
- 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
- 'only_matching': True,
- }, {
- 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/plugin-20190404-1/',
- 'only_matching': True,
- }, {
- 'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/',
- 'only_matching': True,
- }]
- _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7/episode/%s/%s/all%s.json'
-
- def _real_extract(self, url):
- lang, m_type, episode_id = re.match(self._VALID_URL, url).groups()
- if episode_id.isdigit():
- episode_id = episode_id[:4] + '-' + episode_id[4:]
-
- is_video = m_type == 'video'
- episode = self._download_json(
- self._API_URL_TEMPLATE % (
- 'v' if is_video else 'r',
- 'clip' if episode_id[:4] == '9999' else 'esd',
- episode_id, lang, '/all' if is_video else ''),
- episode_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'][0]
- title = episode.get('sub_title_clean') or episode['sub_title']
-
- def get_clean_field(key):
- return episode.get(key + '_clean') or episode.get(key)
-
- series = get_clean_field('title')
-
- thumbnails = []
- for s, w, h in [('', 640, 360), ('_l', 1280, 720)]:
- img_path = episode.get('image' + s)
- if not img_path:
- continue
- thumbnails.append({
- 'id': '%dp' % h,
- 'height': h,
- 'width': w,
- 'url': 'https://www3.nhk.or.jp' + img_path,
- })
-
- info = {
- 'id': episode_id + '-' + lang,
- 'title': '%s - %s' % (series, title) if series and title else title,
- 'description': get_clean_field('description'),
- 'thumbnails': thumbnails,
- 'series': series,
- 'episode': title,
- }
- if is_video:
- info.update({
- '_type': 'url_transparent',
- 'ie_key': 'Piksel',
- 'url': 'https://player.piksel.com/v/refid/nhkworld/prefid/' + episode['vod_id'],
- })
- else:
- audio = episode['audio']
- audio_path = audio['audio']
- info['formats'] = self._extract_m3u8_formats(
- 'https://nhks-vh.akamaihd.net/i%s/master.m3u8' % audio_path,
- episode_id, 'm4a', m3u8_id='hls', fatal=False)
- for proto in ('rtmpt', 'rtmp'):
- info['formats'].append({
- 'ext': 'flv',
- 'format_id': proto,
- 'url': '%s://flv.nhk.or.jp/ondemand/mp4:flv%s' % (proto, audio_path),
- 'vcodec': 'none',
- })
- for f in info['formats']:
- f['language'] = lang
- return info
diff --git a/youtube_dl/extractor/nintendo.py b/youtube_dl/extractor/nintendo.py
deleted file mode 100644
index 4b4e66b05..000000000
--- a/youtube_dl/extractor/nintendo.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from .ooyala import OoyalaIE
-from ..utils import unescapeHTML
-
-
-class NintendoIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?nintendo\.com/games/detail/(?P<id>[^/?#&]+)'
- _TESTS = [{
- 'url': 'http://www.nintendo.com/games/detail/yEiAzhU2eQI1KZ7wOHhngFoAHc1FpHwj',
- 'info_dict': {
- 'id': 'MzMmticjp0VPzO3CCj4rmFOuohEuEWoW',
- 'ext': 'flv',
- 'title': 'Duck Hunt Wii U VC NES - Trailer',
- 'duration': 60.326,
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': ['Ooyala'],
- }, {
- 'url': 'http://www.nintendo.com/games/detail/tokyo-mirage-sessions-fe-wii-u',
- 'info_dict': {
- 'id': 'tokyo-mirage-sessions-fe-wii-u',
- 'title': 'Tokyo Mirage Sessions ♯FE',
- },
- 'playlist_count': 3,
- }]
-
- def _real_extract(self, url):
- page_id = self._match_id(url)
-
- webpage = self._download_webpage(url, page_id)
-
- entries = [
- OoyalaIE._build_url_result(m.group('code'))
- for m in re.finditer(
- r'class=(["\'])embed-video\1[^>]+data-video-code=(["\'])(?P<code>(?:(?!\2).)+)\2',
- webpage)]
-
- return self.playlist_result(
- entries, page_id, unescapeHTML(self._og_search_title(webpage, fatal=False)))
diff --git a/youtube_dl/extractor/nova.py b/youtube_dl/extractor/nova.py
deleted file mode 100644
index 901f44b54..000000000
--- a/youtube_dl/extractor/nova.py
+++ /dev/null
@@ -1,256 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- clean_html,
- int_or_none,
- js_to_json,
- qualities,
- unified_strdate,
- url_or_none,
-)
-
-
-class NovaEmbedIE(InfoExtractor):
- _VALID_URL = r'https?://media\.cms\.nova\.cz/embed/(?P<id>[^/?#&]+)'
- _TEST = {
- 'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1',
- 'md5': 'b3834f6de5401baabf31ed57456463f7',
- 'info_dict': {
- 'id': '8o0n0r',
- 'ext': 'mp4',
- 'title': '2180. díl',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'duration': 2578,
- },
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- bitrates = self._parse_json(
- self._search_regex(
- r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
- video_id, transform_source=js_to_json)
-
- QUALITIES = ('lq', 'mq', 'hq', 'hd')
- quality_key = qualities(QUALITIES)
-
- formats = []
- for format_id, format_list in bitrates.items():
- if not isinstance(format_list, list):
- continue
- for format_url in format_list:
- format_url = url_or_none(format_url)
- if not format_url:
- continue
- f = {
- 'url': format_url,
- }
- f_id = format_id
- for quality in QUALITIES:
- if '%s.mp4' % quality in format_url:
- f_id += '-%s' % quality
- f.update({
- 'quality': quality_key(quality),
- 'format_note': quality.upper(),
- })
- break
- f['format_id'] = f_id
- formats.append(f)
- self._sort_formats(formats)
-
- title = self._og_search_title(
- webpage, default=None) or self._search_regex(
- (r'<value>(?P<title>[^<]+)',
- r'videoTitle\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
- 'title', group='value')
- thumbnail = self._og_search_thumbnail(
- webpage, default=None) or self._search_regex(
- r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
- 'thumbnail', fatal=False, group='value')
- duration = int_or_none(self._search_regex(
- r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
-
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'formats': formats,
- }
-
-
-class NovaIE(InfoExtractor):
- IE_DESC = 'TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz'
- _VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'
- _TESTS = [{
- 'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260',
- 'md5': '1dd7b9d5ea27bc361f110cd855a19bd3',
- 'info_dict': {
- 'id': '1757139',
- 'display_id': 'tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci',
- 'ext': 'mp4',
- 'title': 'Podzemní nemocnice v pražské Krči',
- 'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53',
- 'thumbnail': r're:^https?://.*\.(?:jpg)',
- }
- }, {
- 'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html',
- 'info_dict': {
- 'id': '1753621',
- 'ext': 'mp4',
- 'title': 'Zaklínač 3: Divoký hon',
- 'description': 're:.*Pokud se stejně jako my nemůžete.*',
- 'thumbnail': r're:https?://.*\.jpg(\?.*)?',
- 'upload_date': '20150521',
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- }
- }, {
- # media.cms.nova.cz embed
- 'url': 'https://novaplus.nova.cz/porad/ulice/epizoda/18760-2180-dil',
- 'info_dict': {
- 'id': '8o0n0r',
- 'ext': 'mp4',
- 'title': '2180. díl',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'duration': 2578,
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': [NovaEmbedIE.ie_key()],
- }, {
- 'url': 'http://sport.tn.nova.cz/clanek/sport/hokej/nhl/zivot-jde-dal-hodnotil-po-vyrazeni-z-playoff-jiri-sekac.html',
- 'only_matching': True,
- }, {
- 'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html',
- 'only_matching': True,
- }, {
- 'url': 'http://doma.nova.cz/clanek/zdravi/prijdte-se-zapsat-do-registru-kostni-drene-jiz-ve-stredu-3-cervna.html',
- 'only_matching': True,
- }, {
- 'url': 'http://prask.nova.cz/clanek/novinky/co-si-na-sobe-nase-hvezdy-nechaly-pojistit.html',
- 'only_matching': True,
- }, {
- 'url': 'http://tv.nova.cz/clanek/novinky/zivot-je-zivot-bondovsky-trailer.html',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- display_id = mobj.group('id')
- site = mobj.group('site')
-
- webpage = self._download_webpage(url, display_id)
-
- # novaplus
- embed_id = self._search_regex(
- r'<iframe[^>]+\bsrc=["\'](?:https?:)?//media\.cms\.nova\.cz/embed/([^/?#&]+)',
- webpage, 'embed url', default=None)
- if embed_id:
- return self.url_result(
- 'https://media.cms.nova.cz/embed/%s' % embed_id,
- ie=NovaEmbedIE.ie_key(), video_id=embed_id)
-
- video_id = self._search_regex(
- [r"(?:media|video_id)\s*:\s*'(\d+)'",
- r'media=(\d+)',
- r'id="article_video_(\d+)"',
- r'id="player_(\d+)"'],
- webpage, 'video id')
-
- config_url = self._search_regex(
- r'src="(https?://(?:tn|api)\.nova\.cz/bin/player/videojs/config\.php\?[^"]+)"',
- webpage, 'config url', default=None)
- config_params = {}
-
- if not config_url:
- player = self._parse_json(
- self._search_regex(
- r'(?s)Player\s*\(.+?\s*,\s*({.+?\bmedia\b["\']?\s*:\s*["\']?\d+.+?})\s*\)', webpage,
- 'player', default='{}'),
- video_id, transform_source=js_to_json, fatal=False)
- if player:
- config_url = url_or_none(player.get('configUrl'))
- params = player.get('configParams')
- if isinstance(params, dict):
- config_params = params
-
- if not config_url:
- DEFAULT_SITE_ID = '23000'
- SITES = {
- 'tvnoviny': DEFAULT_SITE_ID,
- 'novaplus': DEFAULT_SITE_ID,
- 'vymena': DEFAULT_SITE_ID,
- 'krasna': DEFAULT_SITE_ID,
- 'fanda': '30',
- 'tn': '30',
- 'doma': '30',
- }
-
- site_id = self._search_regex(
- r'site=(\d+)', webpage, 'site id', default=None) or SITES.get(
- site, DEFAULT_SITE_ID)
-
- config_url = 'https://api.nova.cz/bin/player/videojs/config.php'
- config_params = {
- 'site': site_id,
- 'media': video_id,
- 'quality': 3,
- 'version': 1,
- }
-
- config = self._download_json(
- config_url, display_id,
- 'Downloading config JSON', query=config_params,
- transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
-
- mediafile = config['mediafile']
- video_url = mediafile['src']
-
- m = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>[^/]+?))/&*(?P<playpath>.+)$', video_url)
- if m:
- formats = [{
- 'url': m.group('url'),
- 'app': m.group('app'),
- 'play_path': m.group('playpath'),
- 'player_path': 'http://tvnoviny.nova.cz/static/shared/app/videojs/video-js.swf',
- 'ext': 'flv',
- }]
- else:
- formats = [{
- 'url': video_url,
- }]
- self._sort_formats(formats)
-
- title = mediafile.get('meta', {}).get('title') or self._og_search_title(webpage)
- description = clean_html(self._og_search_description(webpage, default=None))
- thumbnail = config.get('poster')
-
- if site == 'novaplus':
- upload_date = unified_strdate(self._search_regex(
- r'(\d{1,2}-\d{1,2}-\d{4})$', display_id, 'upload date', default=None))
- elif site == 'fanda':
- upload_date = unified_strdate(self._search_regex(
- r'<span class="date_time">(\d{1,2}\.\d{1,2}\.\d{4})', webpage, 'upload date', default=None))
- else:
- upload_date = None
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': description,
- 'upload_date': upload_date,
- 'thumbnail': thumbnail,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/nowness.py b/youtube_dl/extractor/nowness.py
deleted file mode 100644
index f26dafb8f..000000000
--- a/youtube_dl/extractor/nowness.py
+++ /dev/null
@@ -1,147 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .brightcove import (
- BrightcoveLegacyIE,
- BrightcoveNewIE,
-)
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- ExtractorError,
- sanitized_Request,
-)
-
-
-class NownessBaseIE(InfoExtractor):
- def _extract_url_result(self, post):
- if post['type'] == 'video':
- for media in post['media']:
- if media['type'] == 'video':
- video_id = media['content']
- source = media['source']
- if source == 'brightcove':
- player_code = self._download_webpage(
- 'http://www.nowness.com/iframe?id=%s' % video_id, video_id,
- note='Downloading player JavaScript',
- errnote='Unable to download player JavaScript')
- bc_url = BrightcoveLegacyIE._extract_brightcove_url(player_code)
- if bc_url:
- return self.url_result(bc_url, BrightcoveLegacyIE.ie_key())
- bc_url = BrightcoveNewIE._extract_url(self, player_code)
- if bc_url:
- return self.url_result(bc_url, BrightcoveNewIE.ie_key())
- raise ExtractorError('Could not find player definition')
- elif source == 'vimeo':
- return self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
- elif source == 'youtube':
- return self.url_result(video_id, 'Youtube')
- elif source == 'cinematique':
- # youtube-dl currently doesn't support cinematique
- # return self.url_result('http://cinematique.com/embed/%s' % video_id, 'Cinematique')
- pass
-
- def _api_request(self, url, request_path):
- display_id = self._match_id(url)
- request = sanitized_Request(
- 'http://api.nowness.com/api/' + request_path % display_id,
- headers={
- 'X-Nowness-Language': 'zh-cn' if 'cn.nowness.com' in url else 'en-us',
- })
- return display_id, self._download_json(request, display_id)
-
-
-class NownessIE(NownessBaseIE):
- IE_NAME = 'nowness'
- _VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/(?:story|(?:series|category)/[^/]+)/(?P<id>[^/]+?)(?:$|[?#])'
- _TESTS = [{
- 'url': 'https://www.nowness.com/story/candor-the-art-of-gesticulation',
- 'md5': '068bc0202558c2e391924cb8cc470676',
- 'info_dict': {
- 'id': '2520295746001',
- 'ext': 'mp4',
- 'title': 'Candor: The Art of Gesticulation',
- 'description': 'Candor: The Art of Gesticulation',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'timestamp': 1446745676,
- 'upload_date': '20151105',
- 'uploader_id': '2385340575001',
- },
- 'add_ie': ['BrightcoveNew'],
- }, {
- 'url': 'https://cn.nowness.com/story/kasper-bjorke-ft-jaakko-eino-kalevi-tnr',
- 'md5': 'e79cf125e387216f86b2e0a5b5c63aa3',
- 'info_dict': {
- 'id': '3716354522001',
- 'ext': 'mp4',
- 'title': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
- 'description': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'timestamp': 1407315371,
- 'upload_date': '20140806',
- 'uploader_id': '2385340575001',
- },
- 'add_ie': ['BrightcoveNew'],
- }, {
- # vimeo
- 'url': 'https://www.nowness.com/series/nowness-picks/jean-luc-godard-supercut',
- 'md5': '9a5a6a8edf806407e411296ab6bc2a49',
- 'info_dict': {
- 'id': '130020913',
- 'ext': 'mp4',
- 'title': 'Bleu, Blanc, Rouge - A Godard Supercut',
- 'description': 'md5:f0ea5f1857dffca02dbd37875d742cec',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'upload_date': '20150607',
- 'uploader': 'Cinema Sem Lei',
- 'uploader_id': 'cinemasemlei',
- },
- 'add_ie': ['Vimeo'],
- }]
-
- def _real_extract(self, url):
- _, post = self._api_request(url, 'post/getBySlug/%s')
- return self._extract_url_result(post)
-
-
-class NownessPlaylistIE(NownessBaseIE):
- IE_NAME = 'nowness:playlist'
- _VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/playlist/(?P<id>\d+)'
- _TEST = {
- 'url': 'https://www.nowness.com/playlist/3286/i-guess-thats-why-they-call-it-the-blues',
- 'info_dict': {
- 'id': '3286',
- },
- 'playlist_mincount': 8,
- }
-
- def _real_extract(self, url):
- playlist_id, playlist = self._api_request(url, 'post?PlaylistId=%s')
- entries = [self._extract_url_result(item) for item in playlist['items']]
- return self.playlist_result(entries, playlist_id)
-
-
-class NownessSeriesIE(NownessBaseIE):
- IE_NAME = 'nowness:series'
- _VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/series/(?P<id>[^/]+?)(?:$|[?#])'
- _TEST = {
- 'url': 'https://www.nowness.com/series/60-seconds',
- 'info_dict': {
- 'id': '60',
- 'title': '60 Seconds',
- 'description': 'One-minute wisdom in a new NOWNESS series',
- },
- 'playlist_mincount': 4,
- }
-
- def _real_extract(self, url):
- display_id, series = self._api_request(url, 'series/getBySlug/%s')
- entries = [self._extract_url_result(post) for post in series['posts']]
- series_title = None
- series_description = None
- translations = series.get('translations', [])
- if translations:
- series_title = translations[0].get('title') or translations[0]['seoTitle']
- series_description = translations[0].get('seoDescription')
- return self.playlist_result(
- entries, compat_str(series['id']), series_title, series_description)
diff --git a/youtube_dl/extractor/npr.py b/youtube_dl/extractor/npr.py
deleted file mode 100644
index a5e8baa7e..000000000
--- a/youtube_dl/extractor/npr.py
+++ /dev/null
@@ -1,108 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- qualities,
-)
-
-
-class NprIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?npr\.org/(?:sections/[^/]+/)?\d{4}/\d{2}/\d{2}/(?P<id>\d+)'
- _TESTS = [{
- 'url': 'https://www.npr.org/sections/allsongs/2015/10/21/449974205/new-music-from-beach-house-chairlift-cmj-discoveries-and-more',
- 'info_dict': {
- 'id': '449974205',
- 'title': 'New Music From Beach House, Chairlift, CMJ Discoveries And More'
- },
- 'playlist_count': 7,
- }, {
- 'url': 'https://www.npr.org/sections/deceptivecadence/2015/10/09/446928052/music-from-the-shadows-ancient-armenian-hymns-and-piano-jazz',
- 'info_dict': {
- 'id': '446928052',
- 'title': "Songs We Love: Tigran Hamasyan, 'Your Mercy is Boundless'"
- },
- 'playlist': [{
- 'md5': '12fa60cb2d3ed932f53609d4aeceabf1',
- 'info_dict': {
- 'id': '446929930',
- 'ext': 'mp3',
- 'title': 'Your Mercy is Boundless (Bazum en Qo gtutyunqd)',
- 'duration': 402,
- },
- }],
- }, {
- # mutlimedia, not media title
- 'url': 'https://www.npr.org/2017/06/19/533198237/tigers-jaw-tiny-desk-concert',
- 'info_dict': {
- 'id': '533198237',
- 'title': 'Tigers Jaw: Tiny Desk Concert',
- },
- 'playlist': [{
- 'md5': '12fa60cb2d3ed932f53609d4aeceabf1',
- 'info_dict': {
- 'id': '533201718',
- 'ext': 'mp4',
- 'title': 'Tigers Jaw: Tiny Desk Concert',
- 'duration': 402,
- },
- }],
- 'expected_warnings': ['Failed to download m3u8 information'],
- }]
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
-
- story = self._download_json(
- 'http://api.npr.org/query', playlist_id, query={
- 'id': playlist_id,
- 'fields': 'audio,multimedia,title',
- 'format': 'json',
- 'apiKey': 'MDAzMzQ2MjAyMDEyMzk4MTU1MDg3ZmM3MQ010',
- })['list']['story'][0]
- playlist_title = story.get('title', {}).get('$text')
-
- KNOWN_FORMATS = ('threegp', 'm3u8', 'smil', 'mp4', 'mp3')
- quality = qualities(KNOWN_FORMATS)
-
- entries = []
- for media in story.get('audio', []) + story.get('multimedia', []):
- media_id = media['id']
-
- formats = []
- for format_id, formats_entry in media.get('format', {}).items():
- if not formats_entry:
- continue
- if isinstance(formats_entry, list):
- formats_entry = formats_entry[0]
- format_url = formats_entry.get('$text')
- if not format_url:
- continue
- if format_id in KNOWN_FORMATS:
- if format_id == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- format_url, media_id, 'mp4', 'm3u8_native',
- m3u8_id='hls', fatal=False))
- elif format_id == 'smil':
- smil_formats = self._extract_smil_formats(
- format_url, media_id, transform_source=lambda s: s.replace(
- 'rtmp://flash.npr.org/ondemand/', 'https://ondemand.npr.org/'))
- self._check_formats(smil_formats, media_id)
- formats.extend(smil_formats)
- else:
- formats.append({
- 'url': format_url,
- 'format_id': format_id,
- 'quality': quality(format_id),
- })
- self._sort_formats(formats)
-
- entries.append({
- 'id': media_id,
- 'title': media.get('title', {}).get('$text') or playlist_title,
- 'thumbnail': media.get('altImageUrl', {}).get('$text'),
- 'duration': int_or_none(media.get('duration', {}).get('$text')),
- 'formats': formats,
- })
-
- return self.playlist_result(entries, playlist_id, playlist_title)
diff --git a/youtube_dl/extractor/nrk.py b/youtube_dl/extractor/nrk.py
deleted file mode 100644
index 60933f069..000000000
--- a/youtube_dl/extractor/nrk.py
+++ /dev/null
@@ -1,677 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import (
- compat_str,
- compat_urllib_parse_unquote,
-)
-from ..utils import (
- ExtractorError,
- int_or_none,
- JSON_LD_RE,
- NO_DEFAULT,
- parse_age_limit,
- parse_duration,
- try_get,
-)
-
-
-class NRKBaseIE(InfoExtractor):
- _GEO_COUNTRIES = ['NO']
-
- _api_host = None
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- api_hosts = (self._api_host, ) if self._api_host else self._API_HOSTS
-
- for api_host in api_hosts:
- data = self._download_json(
- 'http://%s/mediaelement/%s' % (api_host, video_id),
- video_id, 'Downloading mediaelement JSON',
- fatal=api_host == api_hosts[-1])
- if not data:
- continue
- self._api_host = api_host
- break
-
- title = data.get('fullTitle') or data.get('mainTitle') or data['title']
- video_id = data.get('id') or video_id
-
- entries = []
-
- conviva = data.get('convivaStatistics') or {}
- live = (data.get('mediaElementType') == 'Live'
- or data.get('isLive') is True or conviva.get('isLive'))
-
- def make_title(t):
- return self._live_title(t) if live else t
-
- media_assets = data.get('mediaAssets')
- if media_assets and isinstance(media_assets, list):
- def video_id_and_title(idx):
- return ((video_id, title) if len(media_assets) == 1
- else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx)))
- for num, asset in enumerate(media_assets, 1):
- asset_url = asset.get('url')
- if not asset_url:
- continue
- formats = self._extract_akamai_formats(asset_url, video_id)
- if not formats:
- continue
- self._sort_formats(formats)
-
- # Some f4m streams may not work with hdcore in fragments' URLs
- for f in formats:
- extra_param = f.get('extra_param_to_segment_url')
- if extra_param and 'hdcore' in extra_param:
- del f['extra_param_to_segment_url']
-
- entry_id, entry_title = video_id_and_title(num)
- duration = parse_duration(asset.get('duration'))
- subtitles = {}
- for subtitle in ('webVtt', 'timedText'):
- subtitle_url = asset.get('%sSubtitlesUrl' % subtitle)
- if subtitle_url:
- subtitles.setdefault('no', []).append({
- 'url': compat_urllib_parse_unquote(subtitle_url)
- })
- entries.append({
- 'id': asset.get('carrierId') or entry_id,
- 'title': make_title(entry_title),
- 'duration': duration,
- 'subtitles': subtitles,
- 'formats': formats,
- })
-
- if not entries:
- media_url = data.get('mediaUrl')
- if media_url:
- formats = self._extract_akamai_formats(media_url, video_id)
- self._sort_formats(formats)
- duration = parse_duration(data.get('duration'))
- entries = [{
- 'id': video_id,
- 'title': make_title(title),
- 'duration': duration,
- 'formats': formats,
- }]
-
- if not entries:
- MESSAGES = {
- 'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
- 'ProgramRightsHasExpired': 'Programmet har gått ut',
- 'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
- }
- message_type = data.get('messageType', '')
- # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
- if 'IsGeoBlocked' in message_type:
- self.raise_geo_restricted(
- msg=MESSAGES.get('ProgramIsGeoBlocked'),
- countries=self._GEO_COUNTRIES)
- raise ExtractorError(
- '%s said: %s' % (self.IE_NAME, MESSAGES.get(
- message_type, message_type)),
- expected=True)
-
- series = conviva.get('seriesName') or data.get('seriesTitle')
- episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
-
- season_number = None
- episode_number = None
- if data.get('mediaElementType') == 'Episode':
- _season_episode = data.get('scoresStatistics', {}).get('springStreamStream') or \
- data.get('relativeOriginUrl', '')
- EPISODENUM_RE = [
- r'/s(?P<season>\d{,2})e(?P<episode>\d{,2})\.',
- r'/sesong-(?P<season>\d{,2})/episode-(?P<episode>\d{,2})',
- ]
- season_number = int_or_none(self._search_regex(
- EPISODENUM_RE, _season_episode, 'season number',
- default=None, group='season'))
- episode_number = int_or_none(self._search_regex(
- EPISODENUM_RE, _season_episode, 'episode number',
- default=None, group='episode'))
-
- thumbnails = None
- images = data.get('images')
- if images and isinstance(images, dict):
- web_images = images.get('webImages')
- if isinstance(web_images, list):
- thumbnails = [{
- 'url': image['imageUrl'],
- 'width': int_or_none(image.get('width')),
- 'height': int_or_none(image.get('height')),
- } for image in web_images if image.get('imageUrl')]
-
- description = data.get('description')
- category = data.get('mediaAnalytics', {}).get('category')
-
- common_info = {
- 'description': description,
- 'series': series,
- 'episode': episode,
- 'season_number': season_number,
- 'episode_number': episode_number,
- 'categories': [category] if category else None,
- 'age_limit': parse_age_limit(data.get('legalAge')),
- 'thumbnails': thumbnails,
- }
-
- vcodec = 'none' if data.get('mediaType') == 'Audio' else None
-
- for entry in entries:
- entry.update(common_info)
- for f in entry['formats']:
- f['vcodec'] = vcodec
-
- points = data.get('shortIndexPoints')
- if isinstance(points, list):
- chapters = []
- for next_num, point in enumerate(points, start=1):
- if not isinstance(point, dict):
- continue
- start_time = parse_duration(point.get('startPoint'))
- if start_time is None:
- continue
- end_time = parse_duration(
- data.get('duration')
- if next_num == len(points)
- else points[next_num].get('startPoint'))
- if end_time is None:
- continue
- chapters.append({
- 'start_time': start_time,
- 'end_time': end_time,
- 'title': point.get('title'),
- })
- if chapters and len(entries) == 1:
- entries[0]['chapters'] = chapters
-
- return self.playlist_result(entries, video_id, title, description)
-
-
-class NRKIE(NRKBaseIE):
- _VALID_URL = r'''(?x)
- (?:
- nrk:|
- https?://
- (?:
- (?:www\.)?nrk\.no/video/PS\*|
- v8[-.]psapi\.nrk\.no/mediaelement/
- )
- )
- (?P<id>[^?#&]+)
- '''
- _API_HOSTS = ('psapi.nrk.no', 'v8-psapi.nrk.no')
- _TESTS = [{
- # video
- 'url': 'http://www.nrk.no/video/PS*150533',
- 'md5': '706f34cdf1322577589e369e522b50ef',
- 'info_dict': {
- 'id': '150533',
- 'ext': 'mp4',
- 'title': 'Dompap og andre fugler i Piip-Show',
- 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
- 'duration': 262,
- }
- }, {
- # audio
- 'url': 'http://www.nrk.no/video/PS*154915',
- # MD5 is unstable
- 'info_dict': {
- 'id': '154915',
- 'ext': 'flv',
- 'title': 'Slik høres internett ut når du er blind',
- 'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
- 'duration': 20,
- }
- }, {
- 'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
- 'only_matching': True,
- }, {
- 'url': 'nrk:clip/7707d5a3-ebe7-434a-87d5-a3ebe7a34a70',
- 'only_matching': True,
- }, {
- 'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
- 'only_matching': True,
- }]
-
-
-class NRKTVIE(NRKBaseIE):
- IE_DESC = 'NRK TV and NRK Radio'
- _EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
- _VALID_URL = r'''(?x)
- https?://
- (?:tv|radio)\.nrk(?:super)?\.no/
- (?:serie(?:/[^/]+){1,2}|program)/
- (?![Ee]pisodes)%s
- (?:/\d{2}-\d{2}-\d{4})?
- (?:\#del=(?P<part_id>\d+))?
- ''' % _EPISODE_RE
- _API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
- _TESTS = [{
- 'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
- 'md5': '9a167e54d04671eb6317a37b7bc8a280',
- 'info_dict': {
- 'id': 'MUHH48000314AA',
- 'ext': 'mp4',
- 'title': '20 spørsmål 23.05.2014',
- 'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
- 'duration': 1741,
- 'series': '20 spørsmål',
- 'episode': '23.05.2014',
- },
- }, {
- 'url': 'https://tv.nrk.no/program/mdfp15000514',
- 'info_dict': {
- 'id': 'MDFP15000514CA',
- 'ext': 'mp4',
- 'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014',
- 'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db',
- 'duration': 4605,
- 'series': 'Kunnskapskanalen',
- 'episode': '24.05.2014',
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- # single playlist video
- 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
- 'info_dict': {
- 'id': 'MSPO40010515-part2',
- 'ext': 'flv',
- 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
- 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
- },
- 'params': {
- 'skip_download': True,
- },
- 'expected_warnings': ['Video is geo restricted'],
- 'skip': 'particular part is not supported currently',
- }, {
- 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
- 'playlist': [{
- 'info_dict': {
- 'id': 'MSPO40010515AH',
- 'ext': 'mp4',
- 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)',
- 'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
- 'duration': 772,
- 'series': 'Tour de Ski',
- 'episode': '06.01.2015',
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'info_dict': {
- 'id': 'MSPO40010515BH',
- 'ext': 'mp4',
- 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)',
- 'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
- 'duration': 6175,
- 'series': 'Tour de Ski',
- 'episode': '06.01.2015',
- },
- 'params': {
- 'skip_download': True,
- },
- }],
- 'info_dict': {
- 'id': 'MSPO40010515',
- 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
- 'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
- },
- 'expected_warnings': ['Video is geo restricted'],
- }, {
- 'url': 'https://tv.nrk.no/serie/anno/KMTE50001317/sesong-3/episode-13',
- 'info_dict': {
- 'id': 'KMTE50001317AA',
- 'ext': 'mp4',
- 'title': 'Anno 13:30',
- 'description': 'md5:11d9613661a8dbe6f9bef54e3a4cbbfa',
- 'duration': 2340,
- 'series': 'Anno',
- 'episode': '13:30',
- 'season_number': 3,
- 'episode_number': 13,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'https://tv.nrk.no/serie/nytt-paa-nytt/MUHH46000317/27-01-2017',
- 'info_dict': {
- 'id': 'MUHH46000317AA',
- 'ext': 'mp4',
- 'title': 'Nytt på Nytt 27.01.2017',
- 'description': 'md5:5358d6388fba0ea6f0b6d11c48b9eb4b',
- 'duration': 1796,
- 'series': 'Nytt på nytt',
- 'episode': '27.01.2017',
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
- 'only_matching': True,
- }, {
- 'url': 'https://tv.nrk.no/serie/lindmo/2018/MUHU11006318/avspiller',
- 'only_matching': True,
- }]
-
-
-class NRKTVEpisodeIE(InfoExtractor):
- _VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+/sesong/\d+/episode/\d+)'
- _TEST = {
- 'url': 'https://tv.nrk.no/serie/backstage/sesong/1/episode/8',
- 'info_dict': {
- 'id': 'MSUI14000816AA',
- 'ext': 'mp4',
- 'title': 'Backstage 8:30',
- 'description': 'md5:de6ca5d5a2d56849e4021f2bf2850df4',
- 'duration': 1320,
- 'series': 'Backstage',
- 'season_number': 1,
- 'episode_number': 8,
- 'episode': '8:30',
- },
- 'params': {
- 'skip_download': True,
- },
- }
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- nrk_id = self._parse_json(
- self._search_regex(JSON_LD_RE, webpage, 'JSON-LD', group='json_ld'),
- display_id)['@id']
-
- assert re.match(NRKTVIE._EPISODE_RE, nrk_id)
- return self.url_result(
- 'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id)
-
-
-class NRKTVSerieBaseIE(InfoExtractor):
- def _extract_series(self, webpage, display_id, fatal=True):
- config = self._parse_json(
- self._search_regex(
- (r'INITIAL_DATA(?:_V\d)?_*\s*=\s*({.+?})\s*;',
- r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>'),
- webpage, 'config', default='{}' if not fatal else NO_DEFAULT),
- display_id, fatal=False)
- if not config:
- return
- return try_get(
- config,
- (lambda x: x['initialState']['series'], lambda x: x['series']),
- dict)
-
- def _extract_seasons(self, seasons):
- if not isinstance(seasons, list):
- return []
- entries = []
- for season in seasons:
- entries.extend(self._extract_episodes(season))
- return entries
-
- def _extract_episodes(self, season):
- if not isinstance(season, dict):
- return []
- return self._extract_entries(season.get('episodes'))
-
- def _extract_entries(self, entry_list):
- if not isinstance(entry_list, list):
- return []
- entries = []
- for episode in entry_list:
- nrk_id = episode.get('prfId')
- if not nrk_id or not isinstance(nrk_id, compat_str):
- continue
- entries.append(self.url_result(
- 'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id))
- return entries
-
-
-class NRKTVSeasonIE(NRKTVSerieBaseIE):
- _VALID_URL = r'https?://tv\.nrk\.no/serie/[^/]+/sesong/(?P<id>\d+)'
- _TEST = {
- 'url': 'https://tv.nrk.no/serie/backstage/sesong/1',
- 'info_dict': {
- 'id': '1',
- 'title': 'Sesong 1',
- },
- 'playlist_mincount': 30,
- }
-
- @classmethod
- def suitable(cls, url):
- return (False if NRKTVIE.suitable(url) or NRKTVEpisodeIE.suitable(url)
- else super(NRKTVSeasonIE, cls).suitable(url))
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- series = self._extract_series(webpage, display_id)
-
- season = next(
- s for s in series['seasons']
- if int(display_id) == s.get('seasonNumber'))
-
- title = try_get(season, lambda x: x['titles']['title'], compat_str)
- return self.playlist_result(
- self._extract_episodes(season), display_id, title)
-
-
-class NRKTVSeriesIE(NRKTVSerieBaseIE):
- _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
- _ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
- _TESTS = [{
- # new layout, seasons
- 'url': 'https://tv.nrk.no/serie/backstage',
- 'info_dict': {
- 'id': 'backstage',
- 'title': 'Backstage',
- 'description': 'md5:c3ec3a35736fca0f9e1207b5511143d3',
- },
- 'playlist_mincount': 60,
- }, {
- # new layout, instalments
- 'url': 'https://tv.nrk.no/serie/groenn-glede',
- 'info_dict': {
- 'id': 'groenn-glede',
- 'title': 'Grønn glede',
- 'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
- },
- 'playlist_mincount': 10,
- }, {
- # old layout
- 'url': 'https://tv.nrksuper.no/serie/labyrint',
- 'info_dict': {
- 'id': 'labyrint',
- 'title': 'Labyrint',
- 'description': 'md5:318b597330fdac5959247c9b69fdb1ec',
- },
- 'playlist_mincount': 3,
- }, {
- 'url': 'https://tv.nrk.no/serie/broedrene-dal-og-spektralsteinene',
- 'only_matching': True,
- }, {
- 'url': 'https://tv.nrk.no/serie/saving-the-human-race',
- 'only_matching': True,
- }, {
- 'url': 'https://tv.nrk.no/serie/postmann-pat',
- 'only_matching': True,
- }]
-
- @classmethod
- def suitable(cls, url):
- return (
- False if any(ie.suitable(url)
- for ie in (NRKTVIE, NRKTVEpisodeIE, NRKTVSeasonIE))
- else super(NRKTVSeriesIE, cls).suitable(url))
-
- def _real_extract(self, url):
- series_id = self._match_id(url)
-
- webpage = self._download_webpage(url, series_id)
-
- # New layout (e.g. https://tv.nrk.no/serie/backstage)
- series = self._extract_series(webpage, series_id, fatal=False)
- if series:
- title = try_get(series, lambda x: x['titles']['title'], compat_str)
- description = try_get(
- series, lambda x: x['titles']['subtitle'], compat_str)
- entries = []
- entries.extend(self._extract_seasons(series.get('seasons')))
- entries.extend(self._extract_entries(series.get('instalments')))
- entries.extend(self._extract_episodes(series.get('extraMaterial')))
- return self.playlist_result(entries, series_id, title, description)
-
- # Old layout (e.g. https://tv.nrksuper.no/serie/labyrint)
- entries = [
- self.url_result(
- 'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
- series=series_id, season=season_id))
- for season_id in re.findall(self._ITEM_RE, webpage)
- ]
-
- title = self._html_search_meta(
- 'seriestitle', webpage,
- 'title', default=None) or self._og_search_title(
- webpage, fatal=False)
- if title:
- title = self._search_regex(
- r'NRK (?:Super )?TV\s*[-–]\s*(.+)', title, 'title', default=title)
-
- description = self._html_search_meta(
- 'series_description', webpage,
- 'description', default=None) or self._og_search_description(webpage)
-
- return self.playlist_result(entries, series_id, title, description)
-
-
-class NRKTVDirekteIE(NRKTVIE):
- IE_DESC = 'NRK TV Direkte and NRK Radio Direkte'
- _VALID_URL = r'https?://(?:tv|radio)\.nrk\.no/direkte/(?P<id>[^/?#&]+)'
-
- _TESTS = [{
- 'url': 'https://tv.nrk.no/direkte/nrk1',
- 'only_matching': True,
- }, {
- 'url': 'https://radio.nrk.no/direkte/p1_oslo_akershus',
- 'only_matching': True,
- }]
-
-
-class NRKPlaylistBaseIE(InfoExtractor):
- def _extract_description(self, webpage):
- pass
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
-
- webpage = self._download_webpage(url, playlist_id)
-
- entries = [
- self.url_result('nrk:%s' % video_id, NRKIE.ie_key())
- for video_id in re.findall(self._ITEM_RE, webpage)
- ]
-
- playlist_title = self. _extract_title(webpage)
- playlist_description = self._extract_description(webpage)
-
- return self.playlist_result(
- entries, playlist_id, playlist_title, playlist_description)
-
-
-class NRKPlaylistIE(NRKPlaylistBaseIE):
- _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)'
- _ITEM_RE = r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"'
- _TESTS = [{
- 'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
- 'info_dict': {
- 'id': 'gjenopplev-den-historiske-solformorkelsen-1.12270763',
- 'title': 'Gjenopplev den historiske solformørkelsen',
- 'description': 'md5:c2df8ea3bac5654a26fc2834a542feed',
- },
- 'playlist_count': 2,
- }, {
- 'url': 'http://www.nrk.no/kultur/bok/rivertonprisen-til-karin-fossum-1.12266449',
- 'info_dict': {
- 'id': 'rivertonprisen-til-karin-fossum-1.12266449',
- 'title': 'Rivertonprisen til Karin Fossum',
- 'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
- },
- 'playlist_count': 2,
- }]
-
- def _extract_title(self, webpage):
- return self._og_search_title(webpage, fatal=False)
-
- def _extract_description(self, webpage):
- return self._og_search_description(webpage)
-
-
-class NRKTVEpisodesIE(NRKPlaylistBaseIE):
- _VALID_URL = r'https?://tv\.nrk\.no/program/[Ee]pisodes/[^/]+/(?P<id>\d+)'
- _ITEM_RE = r'data-episode=["\']%s' % NRKTVIE._EPISODE_RE
- _TESTS = [{
- 'url': 'https://tv.nrk.no/program/episodes/nytt-paa-nytt/69031',
- 'info_dict': {
- 'id': '69031',
- 'title': 'Nytt på nytt, sesong: 201210',
- },
- 'playlist_count': 4,
- }]
-
- def _extract_title(self, webpage):
- return self._html_search_regex(
- r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
-
-
-class NRKSkoleIE(InfoExtractor):
- IE_DESC = 'NRK Skole'
- _VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/?\?.*\bmediaId=(?P<id>\d+)'
-
- _TESTS = [{
- 'url': 'https://www.nrk.no/skole/?page=search&q=&mediaId=14099',
- 'md5': '6bc936b01f9dd8ed45bc58b252b2d9b6',
- 'info_dict': {
- 'id': '6021',
- 'ext': 'mp4',
- 'title': 'Genetikk og eneggede tvillinger',
- 'description': 'md5:3aca25dcf38ec30f0363428d2b265f8d',
- 'duration': 399,
- },
- }, {
- 'url': 'https://www.nrk.no/skole/?page=objectives&subject=naturfag&objective=K15114&mediaId=19355',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(
- 'https://mimir.nrk.no/plugin/1.0/static?mediaId=%s' % video_id,
- video_id)
-
- nrk_id = self._parse_json(
- self._search_regex(
- r'<script[^>]+type=["\']application/json["\'][^>]*>({.+?})</script>',
- webpage, 'application json'),
- video_id)['activeMedia']['psId']
-
- return self.url_result('nrk:%s' % nrk_id)
diff --git a/youtube_dl/extractor/nrl.py b/youtube_dl/extractor/nrl.py
deleted file mode 100644
index 798b91e04..000000000
--- a/youtube_dl/extractor/nrl.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-
-
-class NRLTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?nrl\.com/tv(/[^/]+)*/(?P<id>[^/?&#]+)'
- _TEST = {
- 'url': 'https://www.nrl.com/tv/news/match-highlights-titans-v-knights-862805/',
- 'info_dict': {
- 'id': 'YyNnFuaDE6kPJqlDhG4CGQ_w89mKTau4',
- 'ext': 'mp4',
- 'title': 'Match Highlights: Titans v Knights',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- 'format': 'bestvideo',
- },
- }
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
- q_data = self._parse_json(self._search_regex(
- r"(?s)q-data='({.+?})'", webpage, 'player data'), display_id)
- ooyala_id = q_data['videoId']
- return self.url_result(
- 'ooyala:' + ooyala_id, 'Ooyala', ooyala_id, q_data.get('title'))
diff --git a/youtube_dl/extractor/ntvru.py b/youtube_dl/extractor/ntvru.py
deleted file mode 100644
index 4f9cedb84..000000000
--- a/youtube_dl/extractor/ntvru.py
+++ /dev/null
@@ -1,132 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- clean_html,
- xpath_text,
- int_or_none,
-)
-
-
-class NTVRuIE(InfoExtractor):
- IE_NAME = 'ntv.ru'
- _VALID_URL = r'https?://(?:www\.)?ntv\.ru/(?:[^/]+/)*(?P<id>[^/?#&]+)'
-
- _TESTS = [{
- 'url': 'http://www.ntv.ru/novosti/863142/',
- 'md5': 'ba7ea172a91cb83eb734cad18c10e723',
- 'info_dict': {
- 'id': '746000',
- 'ext': 'mp4',
- 'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
- 'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
- 'thumbnail': r're:^http://.*\.jpg',
- 'duration': 136,
- },
- }, {
- 'url': 'http://www.ntv.ru/video/novosti/750370/',
- 'md5': 'adecff79691b4d71e25220a191477124',
- 'info_dict': {
- 'id': '750370',
- 'ext': 'mp4',
- 'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
- 'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
- 'thumbnail': r're:^http://.*\.jpg',
- 'duration': 172,
- },
- }, {
- 'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
- 'md5': '82dbd49b38e3af1d00df16acbeab260c',
- 'info_dict': {
- 'id': '747480',
- 'ext': 'mp4',
- 'title': '«Сегодня». 21 марта 2014 года. 16:00',
- 'description': '«Сегодня». 21 марта 2014 года. 16:00',
- 'thumbnail': r're:^http://.*\.jpg',
- 'duration': 1496,
- },
- }, {
- 'url': 'http://www.ntv.ru/kino/Koma_film',
- 'md5': 'f825770930937aa7e5aca0dc0d29319a',
- 'info_dict': {
- 'id': '1007609',
- 'ext': 'mp4',
- 'title': 'Остросюжетный фильм «Кома»',
- 'description': 'Остросюжетный фильм «Кома»',
- 'thumbnail': r're:^http://.*\.jpg',
- 'duration': 5592,
- },
- }, {
- 'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/',
- 'md5': '9320cd0e23f3ea59c330dc744e06ff3b',
- 'info_dict': {
- 'id': '751482',
- 'ext': 'mp4',
- 'title': '«Дело врачей»: «Деревце жизни»',
- 'description': '«Дело врачей»: «Деревце жизни»',
- 'thumbnail': r're:^http://.*\.jpg',
- 'duration': 2590,
- },
- }]
-
- _VIDEO_ID_REGEXES = [
- r'<meta property="og:url" content="http://www\.ntv\.ru/video/(\d+)',
- r'<video embed=[^>]+><id>(\d+)</id>',
- r'<video restriction[^>]+><key>(\d+)</key>',
- ]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- video_url = self._og_search_property(
- ('video', 'video:iframe'), webpage, default=None)
- if video_url:
- video_id = self._search_regex(
- r'https?://(?:www\.)?ntv\.ru/video/(?:embed/)?(\d+)',
- video_url, 'video id', default=None)
-
- if not video_id:
- video_id = self._html_search_regex(
- self._VIDEO_ID_REGEXES, webpage, 'video id')
-
- player = self._download_xml(
- 'http://www.ntv.ru/vi%s/' % video_id,
- video_id, 'Downloading video XML')
-
- title = clean_html(xpath_text(player, './data/title', 'title', fatal=True))
- description = clean_html(xpath_text(player, './data/description', 'description'))
-
- video = player.find('./data/video')
- video_id = xpath_text(video, './id', 'video id')
- thumbnail = xpath_text(video, './splash', 'thumbnail')
- duration = int_or_none(xpath_text(video, './totaltime', 'duration'))
- view_count = int_or_none(xpath_text(video, './views', 'view count'))
-
- token = self._download_webpage(
- 'http://stat.ntv.ru/services/access/token',
- video_id, 'Downloading access token')
-
- formats = []
- for format_id in ['', 'hi', 'webm']:
- file_ = video.find('./%sfile' % format_id)
- if file_ is None:
- continue
- size = video.find('./%ssize' % format_id)
- formats.append({
- 'url': 'http://media2.ntv.ru/vod/%s&tok=%s' % (file_.text, token),
- 'filesize': int_or_none(size.text if size is not None else None),
- })
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'view_count': view_count,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/nytimes.py b/youtube_dl/extractor/nytimes.py
deleted file mode 100644
index 2bb77ab24..000000000
--- a/youtube_dl/extractor/nytimes.py
+++ /dev/null
@@ -1,223 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import hmac
-import hashlib
-import base64
-
-from .common import InfoExtractor
-from ..utils import (
- determine_ext,
- float_or_none,
- int_or_none,
- js_to_json,
- mimetype2ext,
- parse_iso8601,
- remove_start,
-)
-
-
-class NYTimesBaseIE(InfoExtractor):
- _SECRET = b'pX(2MbU2);4N{7J8)>YwKRJ+/pQ3JkiU2Q^V>mFYv6g6gYvt6v'
-
- def _extract_video_from_id(self, video_id):
- # Authorization generation algorithm is reverse engineered from `signer` in
- # http://graphics8.nytimes.com/video/vhs/vhs-2.x.min.js
- path = '/svc/video/api/v3/video/' + video_id
- hm = hmac.new(self._SECRET, (path + ':vhs').encode(), hashlib.sha512).hexdigest()
- video_data = self._download_json('http://www.nytimes.com' + path, video_id, 'Downloading video JSON', headers={
- 'Authorization': 'NYTV ' + base64.b64encode(hm.encode()).decode(),
- 'X-NYTV': 'vhs',
- }, fatal=False)
- if not video_data:
- video_data = self._download_json(
- 'http://www.nytimes.com/svc/video/api/v2/video/' + video_id,
- video_id, 'Downloading video JSON')
-
- title = video_data['headline']
-
- def get_file_size(file_size):
- if isinstance(file_size, int):
- return file_size
- elif isinstance(file_size, dict):
- return int(file_size.get('value', 0))
- else:
- return None
-
- urls = []
- formats = []
- for video in video_data.get('renditions', []):
- video_url = video.get('url')
- format_id = video.get('type')
- if not video_url or format_id == 'thumbs' or video_url in urls:
- continue
- urls.append(video_url)
- ext = mimetype2ext(video.get('mimetype')) or determine_ext(video_url)
- if ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- video_url, video_id, 'mp4', 'm3u8_native',
- m3u8_id=format_id or 'hls', fatal=False))
- elif ext == 'mpd':
- continue
- # formats.extend(self._extract_mpd_formats(
- # video_url, video_id, format_id or 'dash', fatal=False))
- else:
- formats.append({
- 'url': video_url,
- 'format_id': format_id,
- 'vcodec': video.get('videoencoding') or video.get('video_codec'),
- 'width': int_or_none(video.get('width')),
- 'height': int_or_none(video.get('height')),
- 'filesize': get_file_size(video.get('file_size') or video.get('fileSize')),
- 'tbr': int_or_none(video.get('bitrate'), 1000),
- 'ext': ext,
- })
- self._sort_formats(formats)
-
- thumbnails = []
- for image in video_data.get('images', []):
- image_url = image.get('url')
- if not image_url:
- continue
- thumbnails.append({
- 'url': 'http://www.nytimes.com/' + image_url,
- 'width': int_or_none(image.get('width')),
- 'height': int_or_none(image.get('height')),
- })
-
- publication_date = video_data.get('publication_date')
- timestamp = parse_iso8601(publication_date[:-8]) if publication_date else None
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': video_data.get('summary'),
- 'timestamp': timestamp,
- 'uploader': video_data.get('byline'),
- 'duration': float_or_none(video_data.get('duration'), 1000),
- 'formats': formats,
- 'thumbnails': thumbnails,
- }
-
-
-class NYTimesIE(NYTimesBaseIE):
- _VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)'
-
- _TESTS = [{
- 'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
- 'md5': 'd665342765db043f7e225cff19df0f2d',
- 'info_dict': {
- 'id': '100000002847155',
- 'ext': 'mov',
- 'title': 'Verbatim: What Is a Photocopier?',
- 'description': 'md5:93603dada88ddbda9395632fdc5da260',
- 'timestamp': 1398631707,
- 'upload_date': '20140427',
- 'uploader': 'Brett Weiner',
- 'duration': 419,
- }
- }, {
- 'url': 'http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- return self._extract_video_from_id(video_id)
-
-
-class NYTimesArticleIE(NYTimesBaseIE):
- _VALID_URL = r'https?://(?:www\.)?nytimes\.com/(.(?<!video))*?/(?:[^/]+/)*(?P<id>[^.]+)(?:\.html)?'
- _TESTS = [{
- 'url': 'http://www.nytimes.com/2015/04/14/business/owner-of-gravity-payments-a-credit-card-processor-is-setting-a-new-minimum-wage-70000-a-year.html?_r=0',
- 'md5': 'e2076d58b4da18e6a001d53fd56db3c9',
- 'info_dict': {
- 'id': '100000003628438',
- 'ext': 'mov',
- 'title': 'New Minimum Wage: $70,000 a Year',
- 'description': 'Dan Price, C.E.O. of Gravity Payments, surprised his 120-person staff by announcing that he planned over the next three years to raise the salary of every employee to $70,000 a year.',
- 'timestamp': 1429033037,
- 'upload_date': '20150414',
- 'uploader': 'Matthew Williams',
- }
- }, {
- 'url': 'http://www.nytimes.com/2016/10/14/podcasts/revelations-from-the-final-weeks.html',
- 'md5': 'e0d52040cafb07662acf3c9132db3575',
- 'info_dict': {
- 'id': '100000004709062',
- 'title': 'The Run-Up: ‘He Was Like an Octopus’',
- 'ext': 'mp3',
- 'description': 'md5:fb5c6b93b12efc51649b4847fe066ee4',
- 'series': 'The Run-Up',
- 'episode': '‘He Was Like an Octopus’',
- 'episode_number': 20,
- 'duration': 2130,
- }
- }, {
- 'url': 'http://www.nytimes.com/2016/10/16/books/review/inside-the-new-york-times-book-review-the-rise-of-hitler.html',
- 'info_dict': {
- 'id': '100000004709479',
- 'title': 'The Rise of Hitler',
- 'ext': 'mp3',
- 'description': 'md5:bce877fd9e3444990cb141875fab0028',
- 'creator': 'Pamela Paul',
- 'duration': 3475,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'http://www.nytimes.com/news/minute/2014/03/17/times-minute-whats-next-in-crimea/?_php=true&_type=blogs&_php=true&_type=blogs&_r=1',
- 'only_matching': True,
- }]
-
- def _extract_podcast_from_json(self, json, page_id, webpage):
- podcast_audio = self._parse_json(
- json, page_id, transform_source=js_to_json)
-
- audio_data = podcast_audio['data']
- track = audio_data['track']
-
- episode_title = track['title']
- video_url = track['source']
-
- description = track.get('description') or self._html_search_meta(
- ['og:description', 'twitter:description'], webpage)
-
- podcast_title = audio_data.get('podcast', {}).get('title')
- title = ('%s: %s' % (podcast_title, episode_title)
- if podcast_title else episode_title)
-
- episode = audio_data.get('podcast', {}).get('episode') or ''
- episode_number = int_or_none(self._search_regex(
- r'[Ee]pisode\s+(\d+)', episode, 'episode number', default=None))
-
- return {
- 'id': remove_start(podcast_audio.get('target'), 'FT') or page_id,
- 'url': video_url,
- 'title': title,
- 'description': description,
- 'creator': track.get('credit'),
- 'series': podcast_title,
- 'episode': episode_title,
- 'episode_number': episode_number,
- 'duration': int_or_none(track.get('duration')),
- }
-
- def _real_extract(self, url):
- page_id = self._match_id(url)
-
- webpage = self._download_webpage(url, page_id)
-
- video_id = self._search_regex(
- r'data-videoid=["\'](\d+)', webpage, 'video id',
- default=None, fatal=False)
- if video_id is not None:
- return self._extract_video_from_id(video_id)
-
- podcast_data = self._search_regex(
- (r'NYTD\.FlexTypes\.push\s*\(\s*({.+?})\s*\)\s*;\s*</script',
- r'NYTD\.FlexTypes\.push\s*\(\s*({.+})\s*\)\s*;'),
- webpage, 'podcast data')
- return self._extract_podcast_from_json(podcast_data, page_id, webpage)
diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py
deleted file mode 100644
index 114b93c07..000000000
--- a/youtube_dl/extractor/odnoklassniki.py
+++ /dev/null
@@ -1,259 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..compat import (
- compat_etree_fromstring,
- compat_parse_qs,
- compat_urllib_parse_unquote,
- compat_urllib_parse_urlparse,
-)
-from ..utils import (
- ExtractorError,
- unified_strdate,
- int_or_none,
- qualities,
- unescapeHTML,
- urlencode_postdata,
-)
-
-
-class OdnoklassnikiIE(InfoExtractor):
- _VALID_URL = r'''(?x)
- https?://
- (?:(?:www|m|mobile)\.)?
- (?:odnoklassniki|ok)\.ru/
- (?:
- video(?:embed)?/|
- web-api/video/moviePlayer/|
- live/|
- dk\?.*?st\.mvId=
- )
- (?P<id>[\d-]+)
- '''
- _TESTS = [{
- # metadata in JSON
- 'url': 'http://ok.ru/video/20079905452',
- 'md5': '0b62089b479e06681abaaca9d204f152',
- 'info_dict': {
- 'id': '20079905452',
- 'ext': 'mp4',
- 'title': 'Культура меняет нас (прекрасный ролик!))',
- 'duration': 100,
- 'upload_date': '20141207',
- 'uploader_id': '330537914540',
- 'uploader': 'Виталий Добровольский',
- 'like_count': int,
- 'age_limit': 0,
- },
- }, {
- # metadataUrl
- 'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
- 'md5': '6ff470ea2dd51d5d18c295a355b0b6bc',
- 'info_dict': {
- 'id': '63567059965189-0',
- 'ext': 'mp4',
- 'title': 'Девушка без комплексов ...',
- 'duration': 191,
- 'upload_date': '20150518',
- 'uploader_id': '534380003155',
- 'uploader': '☭ Андрей Мещанинов ☭',
- 'like_count': int,
- 'age_limit': 0,
- 'start_time': 5,
- },
- }, {
- # YouTube embed (metadataUrl, provider == USER_YOUTUBE)
- 'url': 'http://ok.ru/video/64211978996595-1',
- 'md5': '2f206894ffb5dbfcce2c5a14b909eea5',
- 'info_dict': {
- 'id': 'V_VztHT5BzY',
- 'ext': 'mp4',
- 'title': 'Космическая среда от 26 августа 2015',
- 'description': 'md5:848eb8b85e5e3471a3a803dae1343ed0',
- 'duration': 440,
- 'upload_date': '20150826',
- 'uploader_id': 'tvroscosmos',
- 'uploader': 'Телестудия Роскосмоса',
- 'age_limit': 0,
- },
- }, {
- # YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field)
- 'url': 'http://ok.ru/video/62036049272859-0',
- 'info_dict': {
- 'id': '62036049272859-0',
- 'ext': 'mp4',
- 'title': 'МУЗЫКА ДОЖДЯ .',
- 'description': 'md5:6f1867132bd96e33bf53eda1091e8ed0',
- 'upload_date': '20120106',
- 'uploader_id': '473534735899',
- 'uploader': 'МARINA D',
- 'age_limit': 0,
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'Video has not been found',
- }, {
- 'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
- 'only_matching': True,
- }, {
- 'url': 'http://www.ok.ru/video/20648036891',
- 'only_matching': True,
- }, {
- 'url': 'http://www.ok.ru/videoembed/20648036891',
- 'only_matching': True,
- }, {
- 'url': 'http://m.ok.ru/video/20079905452',
- 'only_matching': True,
- }, {
- 'url': 'http://mobile.ok.ru/video/20079905452',
- 'only_matching': True,
- }, {
- 'url': 'https://www.ok.ru/live/484531969818',
- 'only_matching': True,
- }, {
- 'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#',
- 'only_matching': True,
- }, {
- # Paid video
- 'url': 'https://ok.ru/video/954886983203',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- start_time = int_or_none(compat_parse_qs(
- compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0])
-
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(
- 'http://ok.ru/video/%s' % video_id, video_id)
-
- error = self._search_regex(
- r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<',
- webpage, 'error', default=None)
- if error:
- raise ExtractorError(error, expected=True)
-
- player = self._parse_json(
- unescapeHTML(self._search_regex(
- r'data-options=(?P<quote>["\'])(?P<player>{.+?%s.+?})(?P=quote)' % video_id,
- webpage, 'player', group='player')),
- video_id)
-
- flashvars = player['flashvars']
-
- metadata = flashvars.get('metadata')
- if metadata:
- metadata = self._parse_json(metadata, video_id)
- else:
- data = {}
- st_location = flashvars.get('location')
- if st_location:
- data['st.location'] = st_location
- metadata = self._download_json(
- compat_urllib_parse_unquote(flashvars['metadataUrl']),
- video_id, 'Downloading metadata JSON',
- data=urlencode_postdata(data))
-
- movie = metadata['movie']
-
- # Some embedded videos may not contain title in movie dict (e.g.
- # http://ok.ru/video/62036049272859-0) thus we allow missing title
- # here and it's going to be extracted later by an extractor that
- # will process the actual embed.
- provider = metadata.get('provider')
- title = movie['title'] if provider == 'UPLOADED_ODKL' else movie.get('title')
-
- thumbnail = movie.get('poster')
- duration = int_or_none(movie.get('duration'))
-
- author = metadata.get('author', {})
- uploader_id = author.get('id')
- uploader = author.get('name')
-
- upload_date = unified_strdate(self._html_search_meta(
- 'ya:ovs:upload_date', webpage, 'upload date', default=None))
-
- age_limit = None
- adult = self._html_search_meta(
- 'ya:ovs:adult', webpage, 'age limit', default=None)
- if adult:
- age_limit = 18 if adult == 'true' else 0
-
- like_count = int_or_none(metadata.get('likeCount'))
-
- info = {
- 'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'upload_date': upload_date,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'like_count': like_count,
- 'age_limit': age_limit,
- 'start_time': start_time,
- }
-
- if provider == 'USER_YOUTUBE':
- info.update({
- '_type': 'url_transparent',
- 'url': movie['contentId'],
- })
- return info
-
- assert title
- if provider == 'LIVE_TV_APP':
- info['title'] = self._live_title(title)
-
- quality = qualities(('4', '0', '1', '2', '3', '5'))
-
- formats = [{
- 'url': f['url'],
- 'ext': 'mp4',
- 'format_id': f['name'],
- } for f in metadata['videos']]
-
- m3u8_url = metadata.get('hlsManifestUrl')
- if m3u8_url:
- formats.extend(self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4', 'm3u8_native',
- m3u8_id='hls', fatal=False))
-
- dash_manifest = metadata.get('metadataEmbedded')
- if dash_manifest:
- formats.extend(self._parse_mpd_formats(
- compat_etree_fromstring(dash_manifest), 'mpd'))
-
- for fmt in formats:
- fmt_type = self._search_regex(
- r'\btype[/=](\d)', fmt['url'],
- 'format type', default=None)
- if fmt_type:
- fmt['quality'] = quality(fmt_type)
-
- # Live formats
- m3u8_url = metadata.get('hlsMasterPlaylistUrl')
- if m3u8_url:
- formats.extend(self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4', entry_protocol='m3u8',
- m3u8_id='hls', fatal=False))
- rtmp_url = metadata.get('rtmpUrl')
- if rtmp_url:
- formats.append({
- 'url': rtmp_url,
- 'format_id': 'rtmp',
- 'ext': 'flv',
- })
-
- if not formats:
- payment_info = metadata.get('paymentInfo')
- if payment_info:
- raise ExtractorError('This video is paid, subscribe to download it', expected=True)
-
- self._sort_formats(formats)
-
- info['formats'] = formats
- return info
diff --git a/youtube_dl/extractor/onet.py b/youtube_dl/extractor/onet.py
deleted file mode 100644
index 58da1bc27..000000000
--- a/youtube_dl/extractor/onet.py
+++ /dev/null
@@ -1,250 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- determine_ext,
- ExtractorError,
- float_or_none,
- get_element_by_class,
- int_or_none,
- js_to_json,
- NO_DEFAULT,
- parse_iso8601,
- remove_start,
- strip_or_none,
- url_basename,
-)
-
-
-class OnetBaseIE(InfoExtractor):
- def _search_mvp_id(self, webpage):
- return self._search_regex(
- r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id')
-
- def _extract_from_id(self, video_id, webpage=None):
- response = self._download_json(
- 'http://qi.ckm.onetapi.pl/', video_id,
- query={
- 'body[id]': video_id,
- 'body[jsonrpc]': '2.0',
- 'body[method]': 'get_asset_detail',
- 'body[params][ID_Publikacji]': video_id,
- 'body[params][Service]': 'www.onet.pl',
- 'content-type': 'application/jsonp',
- 'x-onet-app': 'player.front.onetapi.pl',
- })
-
- error = response.get('error')
- if error:
- raise ExtractorError(
- '%s said: %s' % (self.IE_NAME, error['message']), expected=True)
-
- video = response['result'].get('0')
-
- formats = []
- for _, formats_dict in video['formats'].items():
- if not isinstance(formats_dict, dict):
- continue
- for format_id, format_list in formats_dict.items():
- if not isinstance(format_list, list):
- continue
- for f in format_list:
- video_url = f.get('url')
- if not video_url:
- continue
- ext = determine_ext(video_url)
- if format_id == 'ism':
- formats.extend(self._extract_ism_formats(
- video_url, video_id, 'mss', fatal=False))
- elif ext == 'mpd':
- formats.extend(self._extract_mpd_formats(
- video_url, video_id, mpd_id='dash', fatal=False))
- else:
- formats.append({
- 'url': video_url,
- 'format_id': format_id,
- 'height': int_or_none(f.get('vertical_resolution')),
- 'width': int_or_none(f.get('horizontal_resolution')),
- 'abr': float_or_none(f.get('audio_bitrate')),
- 'vbr': float_or_none(f.get('video_bitrate')),
- })
- self._sort_formats(formats)
-
- meta = video.get('meta', {})
-
- title = (self._og_search_title(
- webpage, default=None) if webpage else None) or meta['title']
- description = (self._og_search_description(
- webpage, default=None) if webpage else None) or meta.get('description')
- duration = meta.get('length') or meta.get('lenght')
- timestamp = parse_iso8601(meta.get('addDate'), ' ')
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'timestamp': timestamp,
- 'formats': formats,
- }
-
-
-class OnetMVPIE(OnetBaseIE):
- _VALID_URL = r'onetmvp:(?P<id>\d+\.\d+)'
-
- _TEST = {
- 'url': 'onetmvp:381027.1509591944',
- 'only_matching': True,
- }
-
- def _real_extract(self, url):
- return self._extract_from_id(self._match_id(url))
-
-
-class OnetIE(OnetBaseIE):
- _VALID_URL = r'https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+)'
- IE_NAME = 'onet.tv'
-
- _TEST = {
- 'url': 'http://onet.tv/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc',
- 'md5': 'e3ffbf47590032ac3f27249204173d50',
- 'info_dict': {
- 'id': 'qbpyqc',
- 'display_id': 'open-er-festival-2016-najdziwniejsze-wymagania-gwiazd',
- 'ext': 'mp4',
- 'title': 'Open\'er Festival 2016: najdziwniejsze wymagania gwiazd',
- 'description': 'Trzy samochody, których nigdy nie użyto, prywatne spa, hotel dekorowany czarnym suknem czy nielegalne używki. Organizatorzy koncertów i festiwali muszą stawać przed nie lada wyzwaniem zapraszając gwia...',
- 'upload_date': '20160705',
- 'timestamp': 1467721580,
- },
- }
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- display_id, video_id = mobj.group('display_id', 'id')
-
- webpage = self._download_webpage(url, display_id)
-
- mvp_id = self._search_mvp_id(webpage)
-
- info_dict = self._extract_from_id(mvp_id, webpage)
- info_dict.update({
- 'id': video_id,
- 'display_id': display_id,
- })
-
- return info_dict
-
-
-class OnetChannelIE(OnetBaseIE):
- _VALID_URL = r'https?://(?:www\.)?onet\.tv/[a-z]/(?P<id>[a-z]+)(?:[?#]|$)'
- IE_NAME = 'onet.tv:channel'
-
- _TEST = {
- 'url': 'http://onet.tv/k/openerfestival',
- 'info_dict': {
- 'id': 'openerfestival',
- 'title': 'Open\'er Festival Live',
- 'description': 'Dziękujemy, że oglądaliście transmisje. Zobaczcie nasze relacje i wywiady z artystami.',
- },
- 'playlist_mincount': 46,
- }
-
- def _real_extract(self, url):
- channel_id = self._match_id(url)
-
- webpage = self._download_webpage(url, channel_id)
-
- current_clip_info = self._parse_json(self._search_regex(
- r'var\s+currentClip\s*=\s*({[^}]+})', webpage, 'video info'), channel_id,
- transform_source=lambda s: js_to_json(re.sub(r'\'\s*\+\s*\'', '', s)))
- video_id = remove_start(current_clip_info['ckmId'], 'mvp:')
- video_name = url_basename(current_clip_info['url'])
-
- if self._downloader.params.get('noplaylist'):
- self.to_screen(
- 'Downloading just video %s because of --no-playlist' % video_name)
- return self._extract_from_id(video_id, webpage)
-
- self.to_screen(
- 'Downloading channel %s - add --no-playlist to just download video %s' % (
- channel_id, video_name))
- matches = re.findall(
- r'<a[^>]+href=[\'"](https?://(?:www\.)?onet\.tv/[a-z]/[a-z]+/[0-9a-z-]+/[0-9a-z]+)',
- webpage)
- entries = [
- self.url_result(video_link, OnetIE.ie_key())
- for video_link in matches]
-
- channel_title = strip_or_none(get_element_by_class('o_channelName', webpage))
- channel_description = strip_or_none(get_element_by_class('o_channelDesc', webpage))
- return self.playlist_result(entries, channel_id, channel_title, channel_description)
-
-
-class OnetPlIE(InfoExtractor):
- _VALID_URL = r'https?://(?:[^/]+\.)?(?:onet|businessinsider\.com|plejada)\.pl/(?:[^/]+/)+(?P<id>[0-9a-z]+)'
- IE_NAME = 'onet.pl'
-
- _TESTS = [{
- 'url': 'http://eurosport.onet.pl/zimowe/skoki-narciarskie/ziobro-wygral-kwalifikacje-w-pjongczangu/9ckrly',
- 'md5': 'b94021eb56214c3969380388b6e73cb0',
- 'info_dict': {
- 'id': '1561707.1685479',
- 'ext': 'mp4',
- 'title': 'Ziobro wygrał kwalifikacje w Pjongczangu',
- 'description': 'md5:61fb0740084d2d702ea96512a03585b4',
- 'upload_date': '20170214',
- 'timestamp': 1487078046,
- },
- }, {
- # embedded via pulsembed
- 'url': 'http://film.onet.pl/pensjonat-nad-rozlewiskiem-relacja-z-planu-serialu/y428n0',
- 'info_dict': {
- 'id': '501235.965429946',
- 'ext': 'mp4',
- 'title': '"Pensjonat nad rozlewiskiem": relacja z planu serialu',
- 'upload_date': '20170622',
- 'timestamp': 1498159955,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3',
- 'only_matching': True,
- }, {
- 'url': 'http://moto.onet.pl/jak-wybierane-sa-miejsca-na-fotoradary/6rs04e',
- 'only_matching': True,
- }, {
- 'url': 'http://businessinsider.com.pl/wideo/scenariusz-na-koniec-swiata-wedlug-nasa/dwnqptk',
- 'only_matching': True,
- }, {
- 'url': 'http://plejada.pl/weronika-rosati-o-swoim-domniemanym-slubie/n2bq89',
- 'only_matching': True,
- }]
-
- def _search_mvp_id(self, webpage, default=NO_DEFAULT):
- return self._search_regex(
- r'data-(?:params-)?mvp=["\'](\d+\.\d+)', webpage, 'mvp id',
- default=default)
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- mvp_id = self._search_mvp_id(webpage, default=None)
-
- if not mvp_id:
- pulsembed_url = self._search_regex(
- r'data-src=(["\'])(?P<url>(?:https?:)?//pulsembed\.eu/.+?)\1',
- webpage, 'pulsembed url', group='url')
- webpage = self._download_webpage(
- pulsembed_url, video_id, 'Downloading pulsembed webpage')
- mvp_id = self._search_mvp_id(webpage)
-
- return self.url_result(
- 'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id)
diff --git a/youtube_dl/extractor/onionstudios.py b/youtube_dl/extractor/onionstudios.py
deleted file mode 100644
index c6e3d5640..000000000
--- a/youtube_dl/extractor/onionstudios.py
+++ /dev/null
@@ -1,81 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- determine_ext,
- int_or_none,
- float_or_none,
- mimetype2ext,
-)
-
-
-class OnionStudiosIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?onionstudios\.com/(?:video(?:s/[^/]+-|/)|embed\?.*\bid=)(?P<id>\d+)(?!-)'
-
- _TESTS = [{
- 'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
- 'md5': '719d1f8c32094b8c33902c17bcae5e34',
- 'info_dict': {
- 'id': '2937',
- 'ext': 'mp4',
- 'title': 'Hannibal charges forward, stops for a cocktail',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'The A.V. Club',
- 'uploader_id': 'the-av-club',
- },
- }, {
- 'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true',
- 'only_matching': True,
- }, {
- 'url': 'http://www.onionstudios.com/video/6139.json',
- 'only_matching': True,
- }]
-
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'(?s)<(?:iframe|bulbs-video)[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?onionstudios\.com/(?:embed.+?|video/\d+\.json))\1', webpage)
- if mobj:
- return mobj.group('url')
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- video_data = self._download_json(
- 'http://www.onionstudios.com/video/%s.json' % video_id, video_id)
-
- title = video_data['title']
-
- formats = []
- for source in video_data.get('sources', []):
- source_url = source.get('url')
- if not source_url:
- continue
- ext = mimetype2ext(source.get('content_type')) or determine_ext(source_url)
- if ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- source_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
- else:
- tbr = int_or_none(source.get('bitrate'))
- formats.append({
- 'format_id': ext + ('-%d' % tbr if tbr else ''),
- 'url': source_url,
- 'width': int_or_none(source.get('width')),
- 'tbr': tbr,
- 'ext': ext,
- })
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': video_data.get('poster_url'),
- 'uploader': video_data.get('channel_name'),
- 'uploader_id': video_data.get('channel_slug'),
- 'duration': float_or_none(video_data.get('duration', 1000)),
- 'tags': video_data.get('tags'),
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py
deleted file mode 100644
index 995b24d1b..000000000
--- a/youtube_dl/extractor/ooyala.py
+++ /dev/null
@@ -1,207 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import (
- compat_b64decode,
- compat_str,
- compat_urllib_parse_urlencode,
-)
-from ..utils import (
- determine_ext,
- ExtractorError,
- float_or_none,
- int_or_none,
- try_get,
- unsmuggle_url,
-)
-
-
-class OoyalaBaseIE(InfoExtractor):
- _PLAYER_BASE = 'http://player.ooyala.com/'
- _CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/'
- _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s?'
-
- def _extract(self, content_tree_url, video_id, domain='example.org', supportedformats=None, embed_token=None):
- content_tree = self._download_json(content_tree_url, video_id)['content_tree']
- metadata = content_tree[list(content_tree)[0]]
- embed_code = metadata['embed_code']
- pcode = metadata.get('asset_pcode') or embed_code
- title = metadata['title']
-
- auth_data = self._download_json(
- self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code)
- + compat_urllib_parse_urlencode({
- 'domain': domain,
- 'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds,dash,smooth',
- 'embedToken': embed_token,
- }), video_id, headers=self.geo_verification_headers())
-
- cur_auth_data = auth_data['authorization_data'][embed_code]
-
- urls = []
- formats = []
- if cur_auth_data['authorized']:
- for stream in cur_auth_data['streams']:
- url_data = try_get(stream, lambda x: x['url']['data'], compat_str)
- if not url_data:
- continue
- s_url = compat_b64decode(url_data).decode('utf-8')
- if not s_url or s_url in urls:
- continue
- urls.append(s_url)
- ext = determine_ext(s_url, None)
- delivery_type = stream.get('delivery_type')
- if delivery_type == 'hls' or ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- re.sub(r'/ip(?:ad|hone)/', '/all/', s_url), embed_code, 'mp4', 'm3u8_native',
- m3u8_id='hls', fatal=False))
- elif delivery_type == 'hds' or ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False))
- elif delivery_type == 'dash' or ext == 'mpd':
- formats.extend(self._extract_mpd_formats(
- s_url, embed_code, mpd_id='dash', fatal=False))
- elif delivery_type == 'smooth':
- self._extract_ism_formats(
- s_url, embed_code, ism_id='mss', fatal=False)
- elif ext == 'smil':
- formats.extend(self._extract_smil_formats(
- s_url, embed_code, fatal=False))
- else:
- formats.append({
- 'url': s_url,
- 'ext': ext or delivery_type,
- 'vcodec': stream.get('video_codec'),
- 'format_id': delivery_type,
- 'width': int_or_none(stream.get('width')),
- 'height': int_or_none(stream.get('height')),
- 'abr': int_or_none(stream.get('audio_bitrate')),
- 'vbr': int_or_none(stream.get('video_bitrate')),
- 'fps': float_or_none(stream.get('framerate')),
- })
- else:
- raise ExtractorError('%s said: %s' % (
- self.IE_NAME, cur_auth_data['message']), expected=True)
- self._sort_formats(formats)
-
- subtitles = {}
- for lang, sub in metadata.get('closed_captions_vtt', {}).get('captions', {}).items():
- sub_url = sub.get('url')
- if not sub_url:
- continue
- subtitles[lang] = [{
- 'url': sub_url,
- }]
-
- return {
- 'id': embed_code,
- 'title': title,
- 'description': metadata.get('description'),
- 'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'),
- 'duration': float_or_none(metadata.get('duration'), 1000),
- 'subtitles': subtitles,
- 'formats': formats,
- }
-
-
-class OoyalaIE(OoyalaBaseIE):
- _VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P<id>.+?)(&|$)'
-
- _TESTS = [
- {
- # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
- 'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
- 'info_dict': {
- 'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
- 'ext': 'mp4',
- 'title': 'Explaining Data Recovery from Hard Drives and SSDs',
- 'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
- 'duration': 853.386,
- },
- # The video in the original webpage now uses PlayWire
- 'skip': 'Ooyala said: movie expired',
- }, {
- # Only available for ipad
- 'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
- 'info_dict': {
- 'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
- 'ext': 'mp4',
- 'title': 'Simulation Overview - Levels of Simulation',
- 'duration': 194.948,
- },
- },
- {
- # Information available only through SAS api
- # From http://community.plm.automation.siemens.com/t5/News-NX-Manufacturing/Tool-Path-Divide/ba-p/4187
- 'url': 'http://player.ooyala.com/player.js?embedCode=FiOG81ZTrvckcchQxmalf4aQj590qTEx',
- 'md5': 'a84001441b35ea492bc03736e59e7935',
- 'info_dict': {
- 'id': 'FiOG81ZTrvckcchQxmalf4aQj590qTEx',
- 'ext': 'mp4',
- 'title': 'Divide Tool Path.mp4',
- 'duration': 204.405,
- }
- },
- {
- # empty stream['url']['data']
- 'url': 'http://player.ooyala.com/player.js?embedCode=w2bnZtYjE6axZ_dw1Cd0hQtXd_ige2Is',
- 'only_matching': True,
- }
- ]
-
- @staticmethod
- def _url_for_embed_code(embed_code):
- return 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code
-
- @classmethod
- def _build_url_result(cls, embed_code):
- return cls.url_result(cls._url_for_embed_code(embed_code),
- ie=cls.ie_key())
-
- def _real_extract(self, url):
- url, smuggled_data = unsmuggle_url(url, {})
- embed_code = self._match_id(url)
- domain = smuggled_data.get('domain')
- supportedformats = smuggled_data.get('supportedformats')
- embed_token = smuggled_data.get('embed_token')
- content_tree_url = self._CONTENT_TREE_BASE + 'embed_code/%s/%s' % (embed_code, embed_code)
- return self._extract(content_tree_url, embed_code, domain, supportedformats, embed_token)
-
-
-class OoyalaExternalIE(OoyalaBaseIE):
- _VALID_URL = r'''(?x)
- (?:
- ooyalaexternal:|
- https?://.+?\.ooyala\.com/.*?\bexternalId=
- )
- (?P<partner_id>[^:]+)
- :
- (?P<id>.+)
- (?:
- :|
- .*?&pcode=
- )
- (?P<pcode>.+?)
- (?:&|$)
- '''
-
- _TEST = {
- 'url': 'https://player.ooyala.com/player.js?externalId=espn:10365079&pcode=1kNG061cgaoolOncv54OAO1ceO-I&adSetCode=91cDU6NuXTGKz3OdjOxFdAgJVtQcKJnI&callback=handleEvents&hasModuleParams=1&height=968&playerBrandingId=7af3bd04449c444c964f347f11873075&targetReplaceId=videoPlayer&width=1656&wmode=opaque&allowScriptAccess=always',
- 'info_dict': {
- 'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
- 'ext': 'mp4',
- 'title': 'dm_140128_30for30Shorts___JudgingJewellv2',
- 'duration': 1302.0,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }
-
- def _real_extract(self, url):
- partner_id, video_id, pcode = re.match(self._VALID_URL, url).groups()
- content_tree_url = self._CONTENT_TREE_BASE + 'external_id/%s/%s:%s' % (pcode, partner_id, video_id)
- return self._extract(content_tree_url, video_id)
diff --git a/youtube_dl/extractor/openload.py b/youtube_dl/extractor/openload.py
deleted file mode 100644
index 66e38cdb4..000000000
--- a/youtube_dl/extractor/openload.py
+++ /dev/null
@@ -1,501 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import json
-import os
-import re
-import subprocess
-import tempfile
-
-from .common import InfoExtractor
-from ..compat import (
- compat_urlparse,
- compat_kwargs,
-)
-from ..utils import (
- check_executable,
- determine_ext,
- encodeArgument,
- ExtractorError,
- get_element_by_id,
- get_exe_version,
- is_outdated_version,
- std_headers,
-)
-
-
-def cookie_to_dict(cookie):
- cookie_dict = {
- 'name': cookie.name,
- 'value': cookie.value,
- }
- if cookie.port_specified:
- cookie_dict['port'] = cookie.port
- if cookie.domain_specified:
- cookie_dict['domain'] = cookie.domain
- if cookie.path_specified:
- cookie_dict['path'] = cookie.path
- if cookie.expires is not None:
- cookie_dict['expires'] = cookie.expires
- if cookie.secure is not None:
- cookie_dict['secure'] = cookie.secure
- if cookie.discard is not None:
- cookie_dict['discard'] = cookie.discard
- try:
- if (cookie.has_nonstandard_attr('httpOnly')
- or cookie.has_nonstandard_attr('httponly')
- or cookie.has_nonstandard_attr('HttpOnly')):
- cookie_dict['httponly'] = True
- except TypeError:
- pass
- return cookie_dict
-
-
-def cookie_jar_to_list(cookie_jar):
- return [cookie_to_dict(cookie) for cookie in cookie_jar]
-
-
-class PhantomJSwrapper(object):
- """PhantomJS wrapper class
-
- This class is experimental.
- """
-
- _TEMPLATE = r'''
- phantom.onError = function(msg, trace) {{
- var msgStack = ['PHANTOM ERROR: ' + msg];
- if(trace && trace.length) {{
- msgStack.push('TRACE:');
- trace.forEach(function(t) {{
- msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line
- + (t.function ? ' (in function ' + t.function +')' : ''));
- }});
- }}
- console.error(msgStack.join('\n'));
- phantom.exit(1);
- }};
- var page = require('webpage').create();
- var fs = require('fs');
- var read = {{ mode: 'r', charset: 'utf-8' }};
- var write = {{ mode: 'w', charset: 'utf-8' }};
- JSON.parse(fs.read("{cookies}", read)).forEach(function(x) {{
- phantom.addCookie(x);
- }});
- page.settings.resourceTimeout = {timeout};
- page.settings.userAgent = "{ua}";
- page.onLoadStarted = function() {{
- page.evaluate(function() {{
- delete window._phantom;
- delete window.callPhantom;
- }});
- }};
- var saveAndExit = function() {{
- fs.write("{html}", page.content, write);
- fs.write("{cookies}", JSON.stringify(phantom.cookies), write);
- phantom.exit();
- }};
- page.onLoadFinished = function(status) {{
- if(page.url === "") {{
- page.setContent(fs.read("{html}", read), "{url}");
- }}
- else {{
- {jscode}
- }}
- }};
- page.open("");
- '''
-
- _TMP_FILE_NAMES = ['script', 'html', 'cookies']
-
- @staticmethod
- def _version():
- return get_exe_version('phantomjs', version_re=r'([0-9.]+)')
-
- def __init__(self, extractor, required_version=None, timeout=10000):
- self._TMP_FILES = {}
-
- self.exe = check_executable('phantomjs', ['-v'])
- if not self.exe:
- raise ExtractorError('PhantomJS executable not found in PATH, '
- 'download it from http://phantomjs.org',
- expected=True)
-
- self.extractor = extractor
-
- if required_version:
- version = self._version()
- if is_outdated_version(version, required_version):
- self.extractor._downloader.report_warning(
- 'Your copy of PhantomJS is outdated, update it to version '
- '%s or newer if you encounter any errors.' % required_version)
-
- self.options = {
- 'timeout': timeout,
- }
- for name in self._TMP_FILE_NAMES:
- tmp = tempfile.NamedTemporaryFile(delete=False)
- tmp.close()
- self._TMP_FILES[name] = tmp
-
- def __del__(self):
- for name in self._TMP_FILE_NAMES:
- try:
- os.remove(self._TMP_FILES[name].name)
- except (IOError, OSError, KeyError):
- pass
-
- def _save_cookies(self, url):
- cookies = cookie_jar_to_list(self.extractor._downloader.cookiejar)
- for cookie in cookies:
- if 'path' not in cookie:
- cookie['path'] = '/'
- if 'domain' not in cookie:
- cookie['domain'] = compat_urlparse.urlparse(url).netloc
- with open(self._TMP_FILES['cookies'].name, 'wb') as f:
- f.write(json.dumps(cookies).encode('utf-8'))
-
- def _load_cookies(self):
- with open(self._TMP_FILES['cookies'].name, 'rb') as f:
- cookies = json.loads(f.read().decode('utf-8'))
- for cookie in cookies:
- if cookie['httponly'] is True:
- cookie['rest'] = {'httpOnly': None}
- if 'expiry' in cookie:
- cookie['expire_time'] = cookie['expiry']
- self.extractor._set_cookie(**compat_kwargs(cookie))
-
- def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'):
- """
- Downloads webpage (if needed) and executes JS
-
- Params:
- url: website url
- html: optional, html code of website
- video_id: video id
- note: optional, displayed when downloading webpage
- note2: optional, displayed when executing JS
- headers: custom http headers
- jscode: code to be executed when page is loaded
-
- Returns tuple with:
- * downloaded website (after JS execution)
- * anything you print with `console.log` (but not inside `page.execute`!)
-
- In most cases you don't need to add any `jscode`.
- It is executed in `page.onLoadFinished`.
- `saveAndExit();` is mandatory, use it instead of `phantom.exit()`
- It is possible to wait for some element on the webpage, for example:
- var check = function() {
- var elementFound = page.evaluate(function() {
- return document.querySelector('#b.done') !== null;
- });
- if(elementFound)
- saveAndExit();
- else
- window.setTimeout(check, 500);
- }
-
- page.evaluate(function(){
- document.querySelector('#a').click();
- });
- check();
- """
- if 'saveAndExit();' not in jscode:
- raise ExtractorError('`saveAndExit();` not found in `jscode`')
- if not html:
- html = self.extractor._download_webpage(url, video_id, note=note, headers=headers)
- with open(self._TMP_FILES['html'].name, 'wb') as f:
- f.write(html.encode('utf-8'))
-
- self._save_cookies(url)
-
- replaces = self.options
- replaces['url'] = url
- user_agent = headers.get('User-Agent') or std_headers['User-Agent']
- replaces['ua'] = user_agent.replace('"', '\\"')
- replaces['jscode'] = jscode
-
- for x in self._TMP_FILE_NAMES:
- replaces[x] = self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"')
-
- with open(self._TMP_FILES['script'].name, 'wb') as f:
- f.write(self._TEMPLATE.format(**replaces).encode('utf-8'))
-
- if video_id is None:
- self.extractor.to_screen('%s' % (note2,))
- else:
- self.extractor.to_screen('%s: %s' % (video_id, note2))
-
- p = subprocess.Popen([
- self.exe, '--ssl-protocol=any',
- self._TMP_FILES['script'].name
- ], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- out, err = p.communicate()
- if p.returncode != 0:
- raise ExtractorError(
- 'Executing JS failed\n:' + encodeArgument(err))
- with open(self._TMP_FILES['html'].name, 'rb') as f:
- html = f.read().decode('utf-8')
-
- self._load_cookies()
-
- return (html, encodeArgument(out))
-
-
-class OpenloadIE(InfoExtractor):
- _DOMAINS = r'''
- (?:
- openload\.(?:co|io|link|pw)|
- oload\.(?:tv|best|biz|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|online|monster|press|pw|life|live|space|services|website|vip)|
- oladblock\.(?:services|xyz|me)|openloed\.co
- )
- '''
- _VALID_URL = r'''(?x)
- https?://
- (?P<host>
- (?:www\.)?
- %s
- )/
- (?:f|embed)/
- (?P<id>[a-zA-Z0-9-_]+)
- ''' % _DOMAINS
- _EMBED_WORD = 'embed'
- _STREAM_WORD = 'f'
- _REDIR_WORD = 'stream'
- _URL_IDS = ('streamurl', 'streamuri', 'streamurj')
- _TESTS = [{
- 'url': 'https://openload.co/f/kUEfGclsU9o',
- 'md5': 'bf1c059b004ebc7a256f89408e65c36e',
- 'info_dict': {
- 'id': 'kUEfGclsU9o',
- 'ext': 'mp4',
- 'title': 'skyrim_no-audio_1080.mp4',
- 'thumbnail': r're:^https?://.*\.jpg$',
- },
- }, {
- 'url': 'https://openload.co/embed/rjC09fkPLYs',
- 'info_dict': {
- 'id': 'rjC09fkPLYs',
- 'ext': 'mp4',
- 'title': 'movie.mp4',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'subtitles': {
- 'en': [{
- 'ext': 'vtt',
- }],
- },
- },
- 'params': {
- 'skip_download': True, # test subtitles only
- },
- }, {
- 'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4',
- 'only_matching': True,
- }, {
- 'url': 'https://openload.io/f/ZAn6oz-VZGE/',
- 'only_matching': True,
- }, {
- 'url': 'https://openload.co/f/_-ztPaZtMhM/',
- 'only_matching': True,
- }, {
- # unavailable via https://openload.co/f/Sxz5sADo82g/, different layout
- # for title and ext
- 'url': 'https://openload.co/embed/Sxz5sADo82g/',
- 'only_matching': True,
- }, {
- # unavailable via https://openload.co/embed/e-Ixz9ZR5L0/ but available
- # via https://openload.co/f/e-Ixz9ZR5L0/
- 'url': 'https://openload.co/f/e-Ixz9ZR5L0/',
- 'only_matching': True,
- }, {
- 'url': 'https://oload.tv/embed/KnG-kKZdcfY/',
- 'only_matching': True,
- }, {
- 'url': 'http://www.openload.link/f/KnG-kKZdcfY',
- 'only_matching': True,
- }, {
- 'url': 'https://oload.stream/f/KnG-kKZdcfY',
- 'only_matching': True,
- }, {
- 'url': 'https://oload.xyz/f/WwRBpzW8Wtk',
- 'only_matching': True,
- }, {
- 'url': 'https://oload.win/f/kUEfGclsU9o',
- 'only_matching': True,
- }, {
- 'url': 'https://oload.download/f/kUEfGclsU9o',
- 'only_matching': True,
- }, {
- 'url': 'https://oload.cloud/f/4ZDnBXRWiB8',
- 'only_matching': True,
- }, {
- # Its title has not got its extension but url has it
- 'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4',
- 'only_matching': True,
- }, {
- 'url': 'https://oload.cc/embed/5NEAbI2BDSk',
- 'only_matching': True,
- }, {
- 'url': 'https://oload.icu/f/-_i4y_F_Hs8',
- 'only_matching': True,
- }, {
- 'url': 'https://oload.fun/f/gb6G1H4sHXY',
- 'only_matching': True,
- }, {
- 'url': 'https://oload.club/f/Nr1L-aZ2dbQ',
- 'only_matching': True,
- }, {
- 'url': 'https://oload.info/f/5NEAbI2BDSk',
- 'only_matching': True,
- }, {
- 'url': 'https://openload.pw/f/WyKgK8s94N0',
- 'only_matching': True,
- }, {
- 'url': 'https://oload.pw/f/WyKgK8s94N0',
- 'only_matching': True,
- }, {
- 'url': 'https://oload.live/f/-Z58UZ-GR4M',
- 'only_matching': True,
- }, {
- 'url': 'https://oload.space/f/IY4eZSst3u8/',
- 'only_matching': True,
- }, {
- 'url': 'https://oload.services/embed/bs1NWj1dCag/',
- 'only_matching': True,
- }, {
- 'url': 'https://oload.online/f/W8o2UfN1vNY/',
- 'only_matching': True,
- }, {
- 'url': 'https://oload.monster/f/W8o2UfN1vNY/',
- 'only_matching': True,
- }, {
- 'url': 'https://oload.press/embed/drTBl1aOTvk/',
- 'only_matching': True,
- }, {
- 'url': 'https://oload.website/embed/drTBl1aOTvk/',
- 'only_matching': True,
- }, {
- 'url': 'https://oload.life/embed/oOzZjNPw9Dc/',
- 'only_matching': True,
- }, {
- 'url': 'https://oload.biz/f/bEk3Gp8ARr4/',
- 'only_matching': True,
- }, {
- 'url': 'https://oload.best/embed/kkz9JgVZeWc/',
- 'only_matching': True,
- }, {
- 'url': 'https://oladblock.services/f/b8NWEgkqNLI/',
- 'only_matching': True,
- }, {
- 'url': 'https://oladblock.xyz/f/b8NWEgkqNLI/',
- 'only_matching': True,
- }, {
- 'url': 'https://oladblock.me/f/b8NWEgkqNLI/',
- 'only_matching': True,
- }, {
- 'url': 'https://openloed.co/f/b8NWEgkqNLI/',
- 'only_matching': True,
- }, {
- 'url': 'https://oload.vip/f/kUEfGclsU9o',
- 'only_matching': True,
- }]
-
- @classmethod
- def _extract_urls(cls, webpage):
- return re.findall(
- r'(?x)<iframe[^>]+src=["\']((?:https?://)?%s/%s/[a-zA-Z0-9-_]+)'
- % (cls._DOMAINS, cls._EMBED_WORD), webpage)
-
- def _extract_decrypted_page(self, page_url, webpage, video_id):
- phantom = PhantomJSwrapper(self, required_version='2.0')
- webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id)
- return webpage
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- host = mobj.group('host')
- video_id = mobj.group('id')
-
- url_pattern = 'https://%s/%%s/%s/' % (host, video_id)
-
- for path in (self._EMBED_WORD, self._STREAM_WORD):
- page_url = url_pattern % path
- last = path == self._STREAM_WORD
- webpage = self._download_webpage(
- page_url, video_id, 'Downloading %s webpage' % path,
- fatal=last)
- if not webpage:
- continue
- if 'File not found' in webpage or 'deleted by the owner' in webpage:
- if not last:
- continue
- raise ExtractorError('File not found', expected=True, video_id=video_id)
- break
-
- webpage = self._extract_decrypted_page(page_url, webpage, video_id)
- for element_id in self._URL_IDS:
- decoded_id = get_element_by_id(element_id, webpage)
- if decoded_id:
- break
- if not decoded_id:
- decoded_id = self._search_regex(
- (r'>\s*([\w-]+~\d{10,}~\d+\.\d+\.0\.0~[\w-]+)\s*<',
- r'>\s*([\w~-]+~\d+\.\d+\.\d+\.\d+~[\w~-]+)',
- r'>\s*([\w-]+~\d{10,}~(?:[a-f\d]+:){2}:~[\w-]+)\s*<',
- r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)\s*<',
- r'>\s*([\w~-]+~[a-f0-9:]+~[\w~-]+)'), webpage,
- 'stream URL')
- video_url = 'https://%s/%s/%s?mime=true' % (host, self._REDIR_WORD, decoded_id)
-
- title = self._og_search_title(webpage, default=None) or self._search_regex(
- r'<span[^>]+class=["\']title["\'][^>]*>([^<]+)', webpage,
- 'title', default=None) or self._html_search_meta(
- 'description', webpage, 'title', fatal=True)
-
- entries = self._parse_html5_media_entries(page_url, webpage, video_id)
- entry = entries[0] if entries else {}
- subtitles = entry.get('subtitles')
-
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': entry.get('thumbnail') or self._og_search_thumbnail(webpage, default=None),
- 'url': video_url,
- 'ext': determine_ext(title, None) or determine_ext(url, 'mp4'),
- 'subtitles': subtitles,
- }
-
-
-class VerystreamIE(OpenloadIE):
- IE_NAME = 'verystream'
-
- _DOMAINS = r'(?:verystream\.com|woof\.tube)'
- _VALID_URL = r'''(?x)
- https?://
- (?P<host>
- (?:www\.)?
- %s
- )/
- (?:stream|e)/
- (?P<id>[a-zA-Z0-9-_]+)
- ''' % _DOMAINS
- _EMBED_WORD = 'e'
- _STREAM_WORD = 'stream'
- _REDIR_WORD = 'gettoken'
- _URL_IDS = ('videolink', )
- _TESTS = [{
- 'url': 'https://verystream.com/stream/c1GWQ9ngBBx/',
- 'md5': 'd3e8c5628ccb9970b65fd65269886795',
- 'info_dict': {
- 'id': 'c1GWQ9ngBBx',
- 'ext': 'mp4',
- 'title': 'Big Buck Bunny.mp4',
- 'thumbnail': r're:^https?://.*\.jpg$',
- },
- }, {
- 'url': 'https://verystream.com/e/c1GWQ9ngBBx/',
- 'only_matching': True,
- }]
-
- def _extract_decrypted_page(self, page_url, webpage, video_id):
- return webpage # for Verystream, the webpage is already decrypted
diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py
deleted file mode 100644
index 3425f7602..000000000
--- a/youtube_dl/extractor/orf.py
+++ /dev/null
@@ -1,427 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- determine_ext,
- float_or_none,
- HEADRequest,
- int_or_none,
- orderedSet,
- remove_end,
- strip_jsonp,
- unescapeHTML,
- unified_strdate,
- url_or_none,
-)
-
-
-class ORFTVthekIE(InfoExtractor):
- IE_NAME = 'orf:tvthek'
- IE_DESC = 'ORF TVthek'
- _VALID_URL = r'https?://tvthek\.orf\.at/(?:[^/]+/)+(?P<id>\d+)'
-
- _TESTS = [{
- 'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389',
- 'playlist': [{
- 'md5': '2942210346ed779588f428a92db88712',
- 'info_dict': {
- 'id': '8896777',
- 'ext': 'mp4',
- 'title': 'Aufgetischt: Mit der Steirischen Tafelrunde',
- 'description': 'md5:c1272f0245537812d4e36419c207b67d',
- 'duration': 2668,
- 'upload_date': '20141208',
- },
- }],
- 'skip': 'Blocked outside of Austria / Germany',
- }, {
- 'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256',
- 'info_dict': {
- 'id': '7982259',
- 'ext': 'mp4',
- 'title': 'Best of Ingrid Thurnher',
- 'upload_date': '20140527',
- 'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im Jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".',
- },
- 'params': {
- 'skip_download': True, # rtsp downloads
- },
- 'skip': 'Blocked outside of Austria / Germany',
- }, {
- 'url': 'http://tvthek.orf.at/topic/Fluechtlingskrise/10463081/Heimat-Fremde-Heimat/13879132/Senioren-betreuen-Migrantenkinder/13879141',
- 'only_matching': True,
- }, {
- 'url': 'http://tvthek.orf.at/profile/Universum/35429',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
- webpage = self._download_webpage(url, playlist_id)
-
- data_jsb = self._parse_json(
- self._search_regex(
- r'<div[^>]+class=(["\']).*?VideoPlaylist.*?\1[^>]+data-jsb=(["\'])(?P<json>.+?)\2',
- webpage, 'playlist', group='json'),
- playlist_id, transform_source=unescapeHTML)['playlist']['videos']
-
- entries = []
- for sd in data_jsb:
- video_id, title = sd.get('id'), sd.get('title')
- if not video_id or not title:
- continue
- video_id = compat_str(video_id)
- formats = []
- for fd in sd['sources']:
- src = url_or_none(fd.get('src'))
- if not src:
- continue
- format_id_list = []
- for key in ('delivery', 'quality', 'quality_string'):
- value = fd.get(key)
- if value:
- format_id_list.append(value)
- format_id = '-'.join(format_id_list)
- ext = determine_ext(src)
- if ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- src, video_id, 'mp4', m3u8_id=format_id, fatal=False))
- elif ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- src, video_id, f4m_id=format_id, fatal=False))
- else:
- formats.append({
- 'format_id': format_id,
- 'url': src,
- 'protocol': fd.get('protocol'),
- })
-
- # Check for geoblocking.
- # There is a property is_geoprotection, but that's always false
- geo_str = sd.get('geoprotection_string')
- if geo_str:
- try:
- http_url = next(
- f['url']
- for f in formats
- if re.match(r'^https?://.*\.mp4$', f['url']))
- except StopIteration:
- pass
- else:
- req = HEADRequest(http_url)
- self._request_webpage(
- req, video_id,
- note='Testing for geoblocking',
- errnote=((
- 'This video seems to be blocked outside of %s. '
- 'You may want to try the streaming-* formats.')
- % geo_str),
- fatal=False)
-
- self._check_formats(formats, video_id)
- self._sort_formats(formats)
-
- subtitles = {}
- for sub in sd.get('subtitles', []):
- sub_src = sub.get('src')
- if not sub_src:
- continue
- subtitles.setdefault(sub.get('lang', 'de-AT'), []).append({
- 'url': sub_src,
- })
-
- upload_date = unified_strdate(sd.get('created_date'))
- entries.append({
- '_type': 'video',
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- 'subtitles': subtitles,
- 'description': sd.get('description'),
- 'duration': int_or_none(sd.get('duration_in_seconds')),
- 'upload_date': upload_date,
- 'thumbnail': sd.get('image_full_url'),
- })
-
- return {
- '_type': 'playlist',
- 'entries': entries,
- 'id': playlist_id,
- }
-
-
-class ORFRadioIE(InfoExtractor):
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- station = mobj.group('station')
- show_date = mobj.group('date')
- show_id = mobj.group('show')
-
- if station == 'fm4':
- show_id = '4%s' % show_id
-
- data = self._download_json(
- 'http://audioapi.orf.at/%s/api/json/current/broadcast/%s/%s' % (station, show_id, show_date),
- show_id
- )
-
- def extract_entry_dict(info, title, subtitle):
- return {
- 'id': info['loopStreamId'].replace('.mp3', ''),
- 'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (station, info['loopStreamId']),
- 'title': title,
- 'description': subtitle,
- 'duration': (info['end'] - info['start']) / 1000,
- 'timestamp': info['start'] / 1000,
- 'ext': 'mp3',
- 'series': data.get('programTitle')
- }
-
- entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']]
-
- return {
- '_type': 'playlist',
- 'id': show_id,
- 'title': data['title'],
- 'description': data['subtitle'],
- 'entries': entries
- }
-
-
-class ORFFM4IE(ORFRadioIE):
- IE_NAME = 'orf:fm4'
- IE_DESC = 'radio FM4'
- _VALID_URL = r'https?://(?P<station>fm4)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
-
- _TEST = {
- 'url': 'http://fm4.orf.at/player/20170107/CC',
- 'md5': '2b0be47375432a7ef104453432a19212',
- 'info_dict': {
- 'id': '2017-01-07_2100_tl_54_7DaysSat18_31295',
- 'ext': 'mp3',
- 'title': 'Solid Steel Radioshow',
- 'description': 'Die Mixshow von Coldcut und Ninja Tune.',
- 'duration': 3599,
- 'timestamp': 1483819257,
- 'upload_date': '20170107',
- },
- 'skip': 'Shows from ORF radios are only available for 7 days.'
- }
-
-
-class ORFOE1IE(ORFRadioIE):
- IE_NAME = 'orf:oe1'
- IE_DESC = 'Radio Österreich 1'
- _VALID_URL = r'https?://(?P<station>oe1)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
-
- _TEST = {
- 'url': 'http://oe1.orf.at/player/20170108/456544',
- 'md5': '34d8a6e67ea888293741c86a099b745b',
- 'info_dict': {
- 'id': '2017-01-08_0759_tl_51_7DaysSun6_256141',
- 'ext': 'mp3',
- 'title': 'Morgenjournal',
- 'duration': 609,
- 'timestamp': 1483858796,
- 'upload_date': '20170108',
- },
- 'skip': 'Shows from ORF radios are only available for 7 days.'
- }
-
-
-class ORFIPTVIE(InfoExtractor):
- IE_NAME = 'orf:iptv'
- IE_DESC = 'iptv.ORF.at'
- _VALID_URL = r'https?://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)'
-
- _TEST = {
- 'url': 'http://iptv.orf.at/stories/2275236/',
- 'md5': 'c8b22af4718a4b4af58342529453e3e5',
- 'info_dict': {
- 'id': '350612',
- 'ext': 'flv',
- 'title': 'Weitere Evakuierungen um Vulkan Calbuco',
- 'description': 'md5:d689c959bdbcf04efeddedbf2299d633',
- 'duration': 68.197,
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'upload_date': '20150425',
- },
- }
-
- def _real_extract(self, url):
- story_id = self._match_id(url)
-
- webpage = self._download_webpage(
- 'http://iptv.orf.at/stories/%s' % story_id, story_id)
-
- video_id = self._search_regex(
- r'data-video(?:id)?="(\d+)"', webpage, 'video id')
-
- data = self._download_json(
- 'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
- video_id)[0]
-
- duration = float_or_none(data['duration'], 1000)
-
- video = data['sources']['default']
- load_balancer_url = video['loadBalancerUrl']
- abr = int_or_none(video.get('audioBitrate'))
- vbr = int_or_none(video.get('bitrate'))
- fps = int_or_none(video.get('videoFps'))
- width = int_or_none(video.get('videoWidth'))
- height = int_or_none(video.get('videoHeight'))
- thumbnail = video.get('preview')
-
- rendition = self._download_json(
- load_balancer_url, video_id, transform_source=strip_jsonp)
-
- f = {
- 'abr': abr,
- 'vbr': vbr,
- 'fps': fps,
- 'width': width,
- 'height': height,
- }
-
- formats = []
- for format_id, format_url in rendition['redirect'].items():
- if format_id == 'rtmp':
- ff = f.copy()
- ff.update({
- 'url': format_url,
- 'format_id': format_id,
- })
- formats.append(ff)
- elif determine_ext(format_url) == 'f4m':
- formats.extend(self._extract_f4m_formats(
- format_url, video_id, f4m_id=format_id))
- elif determine_ext(format_url) == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- format_url, video_id, 'mp4', m3u8_id=format_id))
- else:
- continue
- self._sort_formats(formats)
-
- title = remove_end(self._og_search_title(webpage), ' - iptv.ORF.at')
- description = self._og_search_description(webpage)
- upload_date = unified_strdate(self._html_search_meta(
- 'dc.date', webpage, 'upload date'))
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'thumbnail': thumbnail,
- 'upload_date': upload_date,
- 'formats': formats,
- }
-
-
-class ORFFM4StoryIE(InfoExtractor):
- IE_NAME = 'orf:fm4:story'
- IE_DESC = 'fm4.orf.at stories'
- _VALID_URL = r'https?://fm4\.orf\.at/stories/(?P<id>\d+)'
-
- _TEST = {
- 'url': 'http://fm4.orf.at/stories/2865738/',
- 'playlist': [{
- 'md5': 'e1c2c706c45c7b34cf478bbf409907ca',
- 'info_dict': {
- 'id': '547792',
- 'ext': 'flv',
- 'title': 'Manu Delago und Inner Tongue live',
- 'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.',
- 'duration': 1748.52,
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'upload_date': '20170913',
- },
- }, {
- 'md5': 'c6dd2179731f86f4f55a7b49899d515f',
- 'info_dict': {
- 'id': '547798',
- 'ext': 'flv',
- 'title': 'Manu Delago und Inner Tongue live (2)',
- 'duration': 1504.08,
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'upload_date': '20170913',
- 'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.',
- },
- }],
- }
-
- def _real_extract(self, url):
- story_id = self._match_id(url)
- webpage = self._download_webpage(url, story_id)
-
- entries = []
- all_ids = orderedSet(re.findall(r'data-video(?:id)?="(\d+)"', webpage))
- for idx, video_id in enumerate(all_ids):
- data = self._download_json(
- 'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
- video_id)[0]
-
- duration = float_or_none(data['duration'], 1000)
-
- video = data['sources']['q8c']
- load_balancer_url = video['loadBalancerUrl']
- abr = int_or_none(video.get('audioBitrate'))
- vbr = int_or_none(video.get('bitrate'))
- fps = int_or_none(video.get('videoFps'))
- width = int_or_none(video.get('videoWidth'))
- height = int_or_none(video.get('videoHeight'))
- thumbnail = video.get('preview')
-
- rendition = self._download_json(
- load_balancer_url, video_id, transform_source=strip_jsonp)
-
- f = {
- 'abr': abr,
- 'vbr': vbr,
- 'fps': fps,
- 'width': width,
- 'height': height,
- }
-
- formats = []
- for format_id, format_url in rendition['redirect'].items():
- if format_id == 'rtmp':
- ff = f.copy()
- ff.update({
- 'url': format_url,
- 'format_id': format_id,
- })
- formats.append(ff)
- elif determine_ext(format_url) == 'f4m':
- formats.extend(self._extract_f4m_formats(
- format_url, video_id, f4m_id=format_id))
- elif determine_ext(format_url) == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- format_url, video_id, 'mp4', m3u8_id=format_id))
- else:
- continue
- self._sort_formats(formats)
-
- title = remove_end(self._og_search_title(webpage), ' - fm4.ORF.at')
- if idx >= 1:
- # Titles are duplicates, make them unique
- title += ' (' + str(idx + 1) + ')'
- description = self._og_search_description(webpage)
- upload_date = unified_strdate(self._html_search_meta(
- 'dc.date', webpage, 'upload date'))
-
- entries.append({
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'thumbnail': thumbnail,
- 'upload_date': upload_date,
- 'formats': formats,
- })
-
- return self.playlist_result(entries)
diff --git a/youtube_dl/extractor/pandatv.py b/youtube_dl/extractor/pandatv.py
deleted file mode 100644
index 4219802d5..000000000
--- a/youtube_dl/extractor/pandatv.py
+++ /dev/null
@@ -1,99 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- qualities,
-)
-
-
-class PandaTVIE(InfoExtractor):
- IE_DESC = '熊猫TV'
- _VALID_URL = r'https?://(?:www\.)?panda\.tv/(?P<id>[0-9]+)'
- _TESTS = [{
- 'url': 'http://www.panda.tv/66666',
- 'info_dict': {
- 'id': '66666',
- 'title': 're:.+',
- 'uploader': '刘杀鸡',
- 'ext': 'flv',
- 'is_live': True,
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'Live stream is offline',
- }, {
- 'url': 'https://www.panda.tv/66666',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- config = self._download_json(
- 'https://www.panda.tv/api_room_v2?roomid=%s' % video_id, video_id)
-
- error_code = config.get('errno', 0)
- if error_code != 0:
- raise ExtractorError(
- '%s returned error %s: %s'
- % (self.IE_NAME, error_code, config['errmsg']),
- expected=True)
-
- data = config['data']
- video_info = data['videoinfo']
-
- # 2 = live, 3 = offline
- if video_info.get('status') != '2':
- raise ExtractorError(
- 'Live stream is offline', expected=True)
-
- title = data['roominfo']['name']
- uploader = data.get('hostinfo', {}).get('name')
- room_key = video_info['room_key']
- stream_addr = video_info.get(
- 'stream_addr', {'OD': '1', 'HD': '1', 'SD': '1'})
-
- # Reverse engineered from web player swf
- # (http://s6.pdim.gs/static/07153e425f581151.swf at the moment of
- # writing).
- plflag0, plflag1 = video_info['plflag'].split('_')
- plflag0 = int(plflag0) - 1
- if plflag1 == '21':
- plflag0 = 10
- plflag1 = '4'
- live_panda = 'live_panda' if plflag0 < 1 else ''
-
- plflag_auth = self._parse_json(video_info['plflag_list'], video_id)
- sign = plflag_auth['auth']['sign']
- ts = plflag_auth['auth']['time']
- rid = plflag_auth['auth']['rid']
-
- quality_key = qualities(['OD', 'HD', 'SD'])
- suffix = ['_small', '_mid', '']
- formats = []
- for k, v in stream_addr.items():
- if v != '1':
- continue
- quality = quality_key(k)
- if quality <= 0:
- continue
- for pref, (ext, pl) in enumerate((('m3u8', '-hls'), ('flv', ''))):
- formats.append({
- 'url': 'https://pl%s%s.live.panda.tv/live_panda/%s%s%s.%s?sign=%s&ts=%s&rid=%s'
- % (pl, plflag1, room_key, live_panda, suffix[quality], ext, sign, ts, rid),
- 'format_id': '%s-%s' % (k, ext),
- 'quality': quality,
- 'source_preference': pref,
- })
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': self._live_title(title),
- 'uploader': uploader,
- 'formats': formats,
- 'is_live': True,
- }
diff --git a/youtube_dl/extractor/patreon.py b/youtube_dl/extractor/patreon.py
deleted file mode 100644
index 426dd8121..000000000
--- a/youtube_dl/extractor/patreon.py
+++ /dev/null
@@ -1,136 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- clean_html,
- determine_ext,
- int_or_none,
- parse_iso8601,
-)
-
-
-class PatreonIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P<id>\d+)'
- _TESTS = [{
- 'url': 'http://www.patreon.com/creation?hid=743933',
- 'md5': 'e25505eec1053a6e6813b8ed369875cc',
- 'info_dict': {
- 'id': '743933',
- 'ext': 'mp3',
- 'title': 'Episode 166: David Smalley of Dogma Debate',
- 'description': 'md5:713b08b772cd6271b9f3906683cfacdf',
- 'uploader': 'Cognitive Dissonance Podcast',
- 'thumbnail': 're:^https?://.*$',
- 'timestamp': 1406473987,
- 'upload_date': '20140727',
- },
- }, {
- 'url': 'http://www.patreon.com/creation?hid=754133',
- 'md5': '3eb09345bf44bf60451b8b0b81759d0a',
- 'info_dict': {
- 'id': '754133',
- 'ext': 'mp3',
- 'title': 'CD 167 Extra',
- 'uploader': 'Cognitive Dissonance Podcast',
- 'thumbnail': 're:^https?://.*$',
- },
- 'skip': 'Patron-only content',
- }, {
- 'url': 'https://www.patreon.com/creation?hid=1682498',
- 'info_dict': {
- 'id': 'SU4fj_aEMVw',
- 'ext': 'mp4',
- 'title': 'I\'m on Patreon!',
- 'uploader': 'TraciJHines',
- 'thumbnail': 're:^https?://.*$',
- 'upload_date': '20150211',
- 'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
- 'uploader_id': 'TraciJHines',
- },
- 'params': {
- 'noplaylist': True,
- 'skip_download': True,
- }
- }, {
- 'url': 'https://www.patreon.com/posts/episode-166-of-743933',
- 'only_matching': True,
- }, {
- 'url': 'https://www.patreon.com/posts/743933',
- 'only_matching': True,
- }]
-
- # Currently Patreon exposes download URL via hidden CSS, so login is not
- # needed. Keeping this commented for when this inevitably changes.
- '''
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
- login_form = {
- 'redirectUrl': 'http://www.patreon.com/',
- 'email': username,
- 'password': password,
- }
-
- request = sanitized_Request(
- 'https://www.patreon.com/processLogin',
- compat_urllib_parse_urlencode(login_form).encode('utf-8')
- )
- login_page = self._download_webpage(request, None, note='Logging in')
-
- if re.search(r'onLoginFailed', login_page):
- raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)
-
- def _real_initialize(self):
- self._login()
- '''
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- post = self._download_json(
- 'https://www.patreon.com/api/posts/' + video_id, video_id)
- attributes = post['data']['attributes']
- title = attributes['title'].strip()
- image = attributes.get('image') or {}
- info = {
- 'id': video_id,
- 'title': title,
- 'description': clean_html(attributes.get('content')),
- 'thumbnail': image.get('large_url') or image.get('url'),
- 'timestamp': parse_iso8601(attributes.get('published_at')),
- 'like_count': int_or_none(attributes.get('like_count')),
- 'comment_count': int_or_none(attributes.get('comment_count')),
- }
-
- def add_file(file_data):
- file_url = file_data.get('url')
- if file_url:
- info.update({
- 'url': file_url,
- 'ext': determine_ext(file_data.get('name'), 'mp3'),
- })
-
- for i in post.get('included', []):
- i_type = i.get('type')
- if i_type == 'attachment':
- add_file(i.get('attributes') or {})
- elif i_type == 'user':
- user_attributes = i.get('attributes')
- if user_attributes:
- info.update({
- 'uploader': user_attributes.get('full_name'),
- 'uploader_url': user_attributes.get('url'),
- })
-
- if not info.get('url'):
- add_file(attributes.get('post_file') or {})
-
- if not info.get('url'):
- info.update({
- '_type': 'url',
- 'url': attributes['embed']['url'],
- })
-
- return info
diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py
deleted file mode 100644
index d3a83ea2b..000000000
--- a/youtube_dl/extractor/peertube.py
+++ /dev/null
@@ -1,547 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- int_or_none,
- parse_resolution,
- try_get,
- unified_timestamp,
- url_or_none,
- urljoin,
-)
-
-
-class PeerTubeIE(InfoExtractor):
- _INSTANCES_RE = r'''(?:
- # Taken from https://instances.joinpeertube.org/instances
- peertube\.rainbowswingers\.net|
- tube\.stanisic\.nl|
- peer\.suiri\.us|
- medias\.libox\.fr|
- videomensoif\.ynh\.fr|
- peertube\.travelpandas\.eu|
- peertube\.rachetjay\.fr|
- peertube\.montecsys\.fr|
- tube\.eskuero\.me|
- peer\.tube|
- peertube\.umeahackerspace\.se|
- tube\.nx-pod\.de|
- video\.monsieurbidouille\.fr|
- tube\.openalgeria\.org|
- vid\.lelux\.fi|
- video\.anormallostpod\.ovh|
- tube\.crapaud-fou\.org|
- peertube\.stemy\.me|
- lostpod\.space|
- exode\.me|
- peertube\.snargol\.com|
- vis\.ion\.ovh|
- videosdulib\.re|
- v\.mbius\.io|
- videos\.judrey\.eu|
- peertube\.osureplayviewer\.xyz|
- peertube\.mathieufamily\.ovh|
- www\.videos-libr\.es|
- fightforinfo\.com|
- peertube\.fediverse\.ru|
- peertube\.oiseauroch\.fr|
- video\.nesven\.eu|
- v\.bearvideo\.win|
- video\.qoto\.org|
- justporn\.cc|
- video\.vny\.fr|
- peervideo\.club|
- tube\.taker\.fr|
- peertube\.chantierlibre\.org|
- tube\.ipfixe\.info|
- tube\.kicou\.info|
- tube\.dodsorf\.as|
- videobit\.cc|
- video\.yukari\.moe|
- videos\.elbinario\.net|
- hkvideo\.live|
- pt\.tux\.tf|
- www\.hkvideo\.live|
- FIGHTFORINFO\.com|
- pt\.765racing\.com|
- peertube\.gnumeria\.eu\.org|
- nordenmedia\.com|
- peertube\.co\.uk|
- tube\.darfweb\.eu|
- tube\.kalah-france\.org|
- 0ch\.in|
- vod\.mochi\.academy|
- film\.node9\.org|
- peertube\.hatthieves\.es|
- video\.fitchfamily\.org|
- peertube\.ddns\.net|
- video\.ifuncle\.kr|
- video\.fdlibre\.eu|
- tube\.22decembre\.eu|
- peertube\.harmoniescreatives\.com|
- tube\.fabrigli\.fr|
- video\.thedwyers\.co|
- video\.bruitbruit\.com|
- peertube\.foxfam\.club|
- peer\.philoxweb\.be|
- videos\.bugs\.social|
- peertube\.malbert\.xyz|
- peertube\.bilange\.ca|
- libretube\.net|
- diytelevision\.com|
- peertube\.fedilab\.app|
- libre\.video|
- video\.mstddntfdn\.online|
- us\.tv|
- peertube\.sl-network\.fr|
- peertube\.dynlinux\.io|
- peertube\.david\.durieux\.family|
- peertube\.linuxrocks\.online|
- peerwatch\.xyz|
- v\.kretschmann\.social|
- tube\.otter\.sh|
- yt\.is\.nota\.live|
- tube\.dragonpsi\.xyz|
- peertube\.boneheadmedia\.com|
- videos\.funkwhale\.audio|
- watch\.44con\.com|
- peertube\.gcaillaut\.fr|
- peertube\.icu|
- pony\.tube|
- spacepub\.space|
- tube\.stbr\.io|
- v\.mom-gay\.faith|
- tube\.port0\.xyz|
- peertube\.simounet\.net|
- play\.jergefelt\.se|
- peertube\.zeteo\.me|
- tube\.danq\.me|
- peertube\.kerenon\.com|
- tube\.fab-l3\.org|
- tube\.calculate\.social|
- peertube\.mckillop\.org|
- tube\.netzspielplatz\.de|
- vod\.ksite\.de|
- peertube\.laas\.fr|
- tube\.govital\.net|
- peertube\.stephenson\.cc|
- bistule\.nohost\.me|
- peertube\.kajalinifi\.de|
- video\.ploud\.jp|
- video\.omniatv\.com|
- peertube\.ffs2play\.fr|
- peertube\.leboulaire\.ovh|
- peertube\.tronic-studio\.com|
- peertube\.public\.cat|
- peertube\.metalbanana\.net|
- video\.1000i100\.fr|
- peertube\.alter-nativ-voll\.de|
- tube\.pasa\.tf|
- tube\.worldofhauru\.xyz|
- pt\.kamp\.site|
- peertube\.teleassist\.fr|
- videos\.mleduc\.xyz|
- conf\.tube|
- media\.privacyinternational\.org|
- pt\.forty-two\.nl|
- video\.halle-leaks\.de|
- video\.grosskopfgames\.de|
- peertube\.schaeferit\.de|
- peertube\.jackbot\.fr|
- tube\.extinctionrebellion\.fr|
- peertube\.f-si\.org|
- video\.subak\.ovh|
- videos\.koweb\.fr|
- peertube\.zergy\.net|
- peertube\.roflcopter\.fr|
- peertube\.floss-marketing-school\.com|
- vloggers\.social|
- peertube\.iriseden\.eu|
- videos\.ubuntu-paris\.org|
- peertube\.mastodon\.host|
- armstube\.com|
- peertube\.s2s\.video|
- peertube\.lol|
- tube\.open-plug\.eu|
- open\.tube|
- peertube\.ch|
- peertube\.normandie-libre\.fr|
- peertube\.slat\.org|
- video\.lacaveatonton\.ovh|
- peertube\.uno|
- peertube\.servebeer\.com|
- peertube\.fedi\.quebec|
- tube\.h3z\.jp|
- tube\.plus200\.com|
- peertube\.eric\.ovh|
- tube\.metadocs\.cc|
- tube\.unmondemeilleur\.eu|
- gouttedeau\.space|
- video\.antirep\.net|
- nrop\.cant\.at|
- tube\.ksl-bmx\.de|
- tube\.plaf\.fr|
- tube\.tchncs\.de|
- video\.devinberg\.com|
- hitchtube\.fr|
- peertube\.kosebamse\.com|
- yunopeertube\.myddns\.me|
- peertube\.varney\.fr|
- peertube\.anon-kenkai\.com|
- tube\.maiti\.info|
- tubee\.fr|
- videos\.dinofly\.com|
- toobnix\.org|
- videotape\.me|
- voca\.tube|
- video\.heromuster\.com|
- video\.lemediatv\.fr|
- video\.up\.edu\.ph|
- balafon\.video|
- video\.ivel\.fr|
- thickrips\.cloud|
- pt\.laurentkruger\.fr|
- video\.monarch-pass\.net|
- peertube\.artica\.center|
- video\.alternanet\.fr|
- indymotion\.fr|
- fanvid\.stopthatimp\.net|
- video\.farci\.org|
- v\.lesterpig\.com|
- video\.okaris\.de|
- tube\.pawelko\.net|
- peertube\.mablr\.org|
- tube\.fede\.re|
- pytu\.be|
- evertron\.tv|
- devtube\.dev-wiki\.de|
- raptube\.antipub\.org|
- video\.selea\.se|
- peertube\.mygaia\.org|
- video\.oh14\.de|
- peertube\.livingutopia\.org|
- peertube\.the-penguin\.de|
- tube\.thechangebook\.org|
- tube\.anjara\.eu|
- pt\.pube\.tk|
- video\.samedi\.pm|
- mplayer\.demouliere\.eu|
- widemus\.de|
- peertube\.me|
- peertube\.zapashcanon\.fr|
- video\.latavernedejohnjohn\.fr|
- peertube\.pcservice46\.fr|
- peertube\.mazzonetto\.eu|
- video\.irem\.univ-paris-diderot\.fr|
- video\.livecchi\.cloud|
- alttube\.fr|
- video\.coop\.tools|
- video\.cabane-libre\.org|
- peertube\.openstreetmap\.fr|
- videos\.alolise\.org|
- irrsinn\.video|
- video\.antopie\.org|
- scitech\.video|
- tube2\.nemsia\.org|
- video\.amic37\.fr|
- peertube\.freeforge\.eu|
- video\.arbitrarion\.com|
- video\.datsemultimedia\.com|
- stoptrackingus\.tv|
- peertube\.ricostrongxxx\.com|
- docker\.videos\.lecygnenoir\.info|
- peertube\.togart\.de|
- tube\.postblue\.info|
- videos\.domainepublic\.net|
- peertube\.cyber-tribal\.com|
- video\.gresille\.org|
- peertube\.dsmouse\.net|
- cinema\.yunohost\.support|
- tube\.theocevaer\.fr|
- repro\.video|
- tube\.4aem\.com|
- quaziinc\.com|
- peertube\.metawurst\.space|
- videos\.wakapo\.com|
- video\.ploud\.fr|
- video\.freeradical\.zone|
- tube\.valinor\.fr|
- refuznik\.video|
- pt\.kircheneuenburg\.de|
- peertube\.asrun\.eu|
- peertube\.lagob\.fr|
- videos\.side-ways\.net|
- 91video\.online|
- video\.valme\.io|
- video\.taboulisme\.com|
- videos-libr\.es|
- tv\.mooh\.fr|
- nuage\.acostey\.fr|
- video\.monsieur-a\.fr|
- peertube\.librelois\.fr|
- videos\.pair2jeux\.tube|
- videos\.pueseso\.club|
- peer\.mathdacloud\.ovh|
- media\.assassinate-you\.net|
- vidcommons\.org|
- ptube\.rousset\.nom\.fr|
- tube\.cyano\.at|
- videos\.squat\.net|
- video\.iphodase\.fr|
- peertube\.makotoworkshop\.org|
- peertube\.serveur\.slv-valbonne\.fr|
- vault\.mle\.party|
- hostyour\.tv|
- videos\.hack2g2\.fr|
- libre\.tube|
- pire\.artisanlogiciel\.net|
- videos\.numerique-en-commun\.fr|
- video\.netsyms\.com|
- video\.die-partei\.social|
- video\.writeas\.org|
- peertube\.swarm\.solvingmaz\.es|
- tube\.pericoloso\.ovh|
- watching\.cypherpunk\.observer|
- videos\.adhocmusic\.com|
- tube\.rfc1149\.net|
- peertube\.librelabucm\.org|
- videos\.numericoop\.fr|
- peertube\.koehn\.com|
- peertube\.anarchmusicall\.net|
- tube\.kampftoast\.de|
- vid\.y-y\.li|
- peertube\.xtenz\.xyz|
- diode\.zone|
- tube\.egf\.mn|
- peertube\.nomagic\.uk|
- visionon\.tv|
- videos\.koumoul\.com|
- video\.rastapuls\.com|
- video\.mantlepro\.com|
- video\.deadsuperhero\.com|
- peertube\.musicstudio\.pro|
- peertube\.we-keys\.fr|
- artitube\.artifaille\.fr|
- peertube\.ethernia\.net|
- tube\.midov\.pl|
- peertube\.fr|
- watch\.snoot\.tube|
- peertube\.donnadieu\.fr|
- argos\.aquilenet\.fr|
- tube\.nemsia\.org|
- tube\.bruniau\.net|
- videos\.darckoune\.moe|
- tube\.traydent\.info|
- dev\.videos\.lecygnenoir\.info|
- peertube\.nayya\.org|
- peertube\.live|
- peertube\.mofgao\.space|
- video\.lequerrec\.eu|
- peertube\.amicale\.net|
- aperi\.tube|
- tube\.ac-lyon\.fr|
- video\.lw1\.at|
- www\.yiny\.org|
- videos\.pofilo\.fr|
- tube\.lou\.lt|
- choob\.h\.etbus\.ch|
- tube\.hoga\.fr|
- peertube\.heberge\.fr|
- video\.obermui\.de|
- videos\.cloudfrancois\.fr|
- betamax\.video|
- video\.typica\.us|
- tube\.piweb\.be|
- video\.blender\.org|
- peertube\.cat|
- tube\.kdy\.ch|
- pe\.ertu\.be|
- peertube\.social|
- videos\.lescommuns\.org|
- tv\.datamol\.org|
- videonaute\.fr|
- dialup\.express|
- peertube\.nogafa\.org|
- megatube\.lilomoino\.fr|
- peertube\.tamanoir\.foucry\.net|
- peertube\.devosi\.org|
- peertube\.1312\.media|
- tube\.bootlicker\.party|
- skeptikon\.fr|
- video\.blueline\.mg|
- tube\.homecomputing\.fr|
- tube\.ouahpiti\.info|
- video\.tedomum\.net|
- video\.g3l\.org|
- fontube\.fr|
- peertube\.gaialabs\.ch|
- tube\.kher\.nl|
- peertube\.qtg\.fr|
- video\.migennes\.net|
- tube\.p2p\.legal|
- troll\.tv|
- videos\.iut-orsay\.fr|
- peertube\.solidev\.net|
- videos\.cemea\.org|
- video\.passageenseine\.fr|
- videos\.festivalparminous\.org|
- peertube\.touhoppai\.moe|
- sikke\.fi|
- peer\.hostux\.social|
- share\.tube|
- peertube\.walkingmountains\.fr|
- videos\.benpro\.fr|
- peertube\.parleur\.net|
- peertube\.heraut\.eu|
- tube\.aquilenet\.fr|
- peertube\.gegeweb\.eu|
- framatube\.org|
- thinkerview\.video|
- tube\.conferences-gesticulees\.net|
- peertube\.datagueule\.tv|
- video\.lqdn\.fr|
- tube\.mochi\.academy|
- media\.zat\.im|
- video\.colibris-outilslibres\.org|
- tube\.svnet\.fr|
- peertube\.video|
- peertube3\.cpy\.re|
- peertube2\.cpy\.re|
- videos\.tcit\.fr|
- peertube\.cpy\.re
- )'''
- _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
- _VALID_URL = r'''(?x)
- (?:
- peertube:(?P<host>[^:]+):|
- https?://(?P<host_2>%s)/(?:videos/(?:watch|embed)|api/v\d/videos)/
- )
- (?P<id>%s)
- ''' % (_INSTANCES_RE, _UUID_RE)
- _TESTS = [{
- 'url': 'https://peertube.cpy.re/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c',
- 'md5': '80f24ff364cc9d333529506a263e7feb',
- 'info_dict': {
- 'id': '2790feb0-8120-4e63-9af3-c943c69f5e6c',
- 'ext': 'mp4',
- 'title': 'wow',
- 'description': 'wow such video, so gif',
- 'thumbnail': r're:https?://.*\.(?:jpg|png)',
- 'timestamp': 1519297480,
- 'upload_date': '20180222',
- 'uploader': 'Luclu7',
- 'uploader_id': '7fc42640-efdb-4505-a45d-a15b1a5496f1',
- 'uploder_url': 'https://peertube.nsa.ovh/accounts/luclu7',
- 'license': 'Unknown',
- 'duration': 3,
- 'view_count': int,
- 'like_count': int,
- 'dislike_count': int,
- 'tags': list,
- 'categories': list,
- }
- }, {
- 'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
- 'only_matching': True,
- }, {
- # nsfw
- 'url': 'https://tube.22decembre.eu/videos/watch/9bb88cd3-9959-46d9-9ab9-33d2bb704c39',
- 'only_matching': True,
- }, {
- 'url': 'https://tube.22decembre.eu/videos/embed/fed67262-6edb-4d1c-833b-daa9085c71d7',
- 'only_matching': True,
- }, {
- 'url': 'https://tube.openalgeria.org/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8',
- 'only_matching': True,
- }, {
- 'url': 'peertube:video.blender.org:b37a5b9f-e6b5-415c-b700-04a5cd6ec205',
- 'only_matching': True,
- }]
-
- @staticmethod
- def _extract_peertube_url(webpage, source_url):
- mobj = re.match(
- r'https?://(?P<host>[^/]+)/videos/(?:watch|embed)/(?P<id>%s)'
- % PeerTubeIE._UUID_RE, source_url)
- if mobj and any(p in webpage for p in (
- '<title>PeerTube<',
- 'There will be other non JS-based clients to access PeerTube',
- '>We are sorry but it seems that PeerTube is not compatible with your web browser.<')):
- return 'peertube:%s:%s' % mobj.group('host', 'id')
-
- @staticmethod
- def _extract_urls(webpage, source_url):
- entries = re.findall(
- r'''(?x)<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//%s/videos/embed/%s)'''
- % (PeerTubeIE._INSTANCES_RE, PeerTubeIE._UUID_RE), webpage)
- if not entries:
- peertube_url = PeerTubeIE._extract_peertube_url(webpage, source_url)
- if peertube_url:
- entries = [peertube_url]
- return entries
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- host = mobj.group('host') or mobj.group('host_2')
- video_id = mobj.group('id')
-
- video = self._download_json(
- 'https://%s/api/v1/videos/%s' % (host, video_id), video_id)
-
- title = video['name']
-
- formats = []
- for file_ in video['files']:
- if not isinstance(file_, dict):
- continue
- file_url = url_or_none(file_.get('fileUrl'))
- if not file_url:
- continue
- file_size = int_or_none(file_.get('size'))
- format_id = try_get(
- file_, lambda x: x['resolution']['label'], compat_str)
- f = parse_resolution(format_id)
- f.update({
- 'url': file_url,
- 'format_id': format_id,
- 'filesize': file_size,
- })
- formats.append(f)
- self._sort_formats(formats)
-
- def account_data(field):
- return try_get(video, lambda x: x['account'][field], compat_str)
-
- category = try_get(video, lambda x: x['category']['label'], compat_str)
- categories = [category] if category else None
-
- nsfw = video.get('nsfw')
- if nsfw is bool:
- age_limit = 18 if nsfw else 0
- else:
- age_limit = None
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': video.get('description'),
- 'thumbnail': urljoin(url, video.get('thumbnailPath')),
- 'timestamp': unified_timestamp(video.get('publishedAt')),
- 'uploader': account_data('displayName'),
- 'uploader_id': account_data('uuid'),
- 'uploder_url': account_data('url'),
- 'license': try_get(
- video, lambda x: x['licence']['label'], compat_str),
- 'duration': int_or_none(video.get('duration')),
- 'view_count': int_or_none(video.get('views')),
- 'like_count': int_or_none(video.get('likes')),
- 'dislike_count': int_or_none(video.get('dislikes')),
- 'age_limit': age_limit,
- 'tags': try_get(video, lambda x: x['tags'], list),
- 'categories': categories,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/periscope.py b/youtube_dl/extractor/periscope.py
deleted file mode 100644
index b337a56c0..000000000
--- a/youtube_dl/extractor/periscope.py
+++ /dev/null
@@ -1,171 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- parse_iso8601,
- unescapeHTML,
-)
-
-
-class PeriscopeBaseIE(InfoExtractor):
- def _call_api(self, method, query, item_id):
- return self._download_json(
- 'https://api.periscope.tv/api/v2/%s' % method,
- item_id, query=query)
-
-
-class PeriscopeIE(PeriscopeBaseIE):
- IE_DESC = 'Periscope'
- IE_NAME = 'periscope'
- _VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/[^/]+/(?P<id>[^/?#]+)'
- # Alive example URLs can be found here http://onperiscope.com/
- _TESTS = [{
- 'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==',
- 'md5': '65b57957972e503fcbbaeed8f4fa04ca',
- 'info_dict': {
- 'id': '56102209',
- 'ext': 'mp4',
- 'title': 'Bec Boop - 🚠✈️🇬🇧 Fly above #London in Emirates Air Line cable car at night 🇬🇧✈️🚠 #BoopScope 🎀💗',
- 'timestamp': 1438978559,
- 'upload_date': '20150807',
- 'uploader': 'Bec Boop',
- 'uploader_id': '1465763',
- },
- 'skip': 'Expires in 24 hours',
- }, {
- 'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv',
- 'only_matching': True,
- }, {
- 'url': 'https://www.periscope.tv/bastaakanoggano/1OdKrlkZZjOJX',
- 'only_matching': True,
- }, {
- 'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv',
- 'only_matching': True,
- }]
-
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<iframe[^>]+src=([\'"])(?P<url>(?:https?:)?//(?:www\.)?(?:periscope|pscp)\.tv/(?:(?!\1).)+)\1', webpage)
- if mobj:
- return mobj.group('url')
-
- def _real_extract(self, url):
- token = self._match_id(url)
-
- stream = self._call_api(
- 'accessVideoPublic', {'broadcast_id': token}, token)
-
- broadcast = stream['broadcast']
- title = broadcast['status']
-
- uploader = broadcast.get('user_display_name') or broadcast.get('username')
- uploader_id = (broadcast.get('user_id') or broadcast.get('username'))
-
- title = '%s - %s' % (uploader, title) if uploader else title
- state = broadcast.get('state').lower()
- if state == 'running':
- title = self._live_title(title)
- timestamp = parse_iso8601(broadcast.get('created_at'))
-
- thumbnails = [{
- 'url': broadcast[image],
- } for image in ('image_url', 'image_url_small') if broadcast.get(image)]
-
- width = int_or_none(broadcast.get('width'))
- height = int_or_none(broadcast.get('height'))
-
- def add_width_and_height(f):
- for key, val in (('width', width), ('height', height)):
- if not f.get(key):
- f[key] = val
-
- video_urls = set()
- formats = []
- for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'):
- video_url = stream.get(format_id + '_url')
- if not video_url or video_url in video_urls:
- continue
- video_urls.add(video_url)
- if format_id != 'rtmp':
- m3u8_formats = self._extract_m3u8_formats(
- video_url, token, 'mp4',
- entry_protocol='m3u8_native'
- if state in ('ended', 'timed_out') else 'm3u8',
- m3u8_id=format_id, fatal=False)
- if len(m3u8_formats) == 1:
- add_width_and_height(m3u8_formats[0])
- formats.extend(m3u8_formats)
- continue
- rtmp_format = {
- 'url': video_url,
- 'ext': 'flv' if format_id == 'rtmp' else 'mp4',
- }
- add_width_and_height(rtmp_format)
- formats.append(rtmp_format)
- self._sort_formats(formats)
-
- return {
- 'id': broadcast.get('id') or token,
- 'title': title,
- 'timestamp': timestamp,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'thumbnails': thumbnails,
- 'formats': formats,
- }
-
-
-class PeriscopeUserIE(PeriscopeBaseIE):
- _VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/(?P<id>[^/]+)/?$'
- IE_DESC = 'Periscope user videos'
- IE_NAME = 'periscope:user'
-
- _TEST = {
- 'url': 'https://www.periscope.tv/LularoeHusbandMike/',
- 'info_dict': {
- 'id': 'LularoeHusbandMike',
- 'title': 'LULAROE HUSBAND MIKE',
- 'description': 'md5:6cf4ec8047768098da58e446e82c82f0',
- },
- # Periscope only shows videos in the last 24 hours, so it's possible to
- # get 0 videos
- 'playlist_mincount': 0,
- }
-
- def _real_extract(self, url):
- user_name = self._match_id(url)
-
- webpage = self._download_webpage(url, user_name)
-
- data_store = self._parse_json(
- unescapeHTML(self._search_regex(
- r'data-store=(["\'])(?P<data>.+?)\1',
- webpage, 'data store', default='{}', group='data')),
- user_name)
-
- user = list(data_store['UserCache']['users'].values())[0]['user']
- user_id = user['id']
- session_id = data_store['SessionToken']['public']['broadcastHistory']['token']['session_id']
-
- broadcasts = self._call_api(
- 'getUserBroadcastsPublic',
- {'user_id': user_id, 'session_id': session_id},
- user_name)['broadcasts']
-
- broadcast_ids = [
- broadcast['id'] for broadcast in broadcasts if broadcast.get('id')]
-
- title = user.get('display_name') or user.get('username') or user_name
- description = user.get('description')
-
- entries = [
- self.url_result(
- 'https://www.periscope.tv/%s/%s' % (user_name, broadcast_id))
- for broadcast_id in broadcast_ids]
-
- return self.playlist_result(entries, user_id, title, description)
diff --git a/youtube_dl/extractor/phoenix.py b/youtube_dl/extractor/phoenix.py
deleted file mode 100644
index e435c28e1..000000000
--- a/youtube_dl/extractor/phoenix.py
+++ /dev/null
@@ -1,45 +0,0 @@
-from __future__ import unicode_literals
-
-from .dreisat import DreiSatIE
-
-
-class PhoenixIE(DreiSatIE):
- IE_NAME = 'phoenix.de'
- _VALID_URL = r'''(?x)https?://(?:www\.)?phoenix\.de/content/
- (?:
- phoenix/die_sendungen/(?:[^/]+/)?
- )?
- (?P<id>[0-9]+)'''
- _TESTS = [
- {
- 'url': 'http://www.phoenix.de/content/884301',
- 'md5': 'ed249f045256150c92e72dbb70eadec6',
- 'info_dict': {
- 'id': '884301',
- 'ext': 'mp4',
- 'title': 'Michael Krons mit Hans-Werner Sinn',
- 'description': 'Im Dialog - Sa. 25.10.14, 00.00 - 00.35 Uhr',
- 'upload_date': '20141025',
- 'uploader': 'Im Dialog',
- }
- },
- {
- 'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/869815',
- 'only_matching': True,
- },
- {
- 'url': 'http://www.phoenix.de/content/phoenix/die_sendungen/diskussionen/928234',
- 'only_matching': True,
- },
- ]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- internal_id = self._search_regex(
- r'<div class="phx_vod" id="phx_vod_([0-9]+)"',
- webpage, 'internal video ID')
-
- api_url = 'http://www.phoenix.de/php/mediaplayer/data/beitrags_details.php?ak=web&id=%s' % internal_id
- return self.extract_from_xml_url(video_id, api_url)
diff --git a/youtube_dl/extractor/platzi.py b/youtube_dl/extractor/platzi.py
deleted file mode 100644
index 602207beb..000000000
--- a/youtube_dl/extractor/platzi.py
+++ /dev/null
@@ -1,224 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..compat import (
- compat_b64decode,
- compat_str,
-)
-from ..utils import (
- clean_html,
- ExtractorError,
- int_or_none,
- str_or_none,
- try_get,
- url_or_none,
- urlencode_postdata,
- urljoin,
-)
-
-
-class PlatziBaseIE(InfoExtractor):
- _LOGIN_URL = 'https://platzi.com/login/'
- _NETRC_MACHINE = 'platzi'
-
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
- login_page = self._download_webpage(
- self._LOGIN_URL, None, 'Downloading login page')
-
- login_form = self._hidden_inputs(login_page)
-
- login_form.update({
- 'email': username,
- 'password': password,
- })
-
- urlh = self._request_webpage(
- self._LOGIN_URL, None, 'Logging in',
- data=urlencode_postdata(login_form),
- headers={'Referer': self._LOGIN_URL})
-
- # login succeeded
- if 'platzi.com/login' not in compat_str(urlh.geturl()):
- return
-
- login_error = self._webpage_read_content(
- urlh, self._LOGIN_URL, None, 'Downloading login error page')
-
- login = self._parse_json(
- self._search_regex(
- r'login\s*=\s*({.+?})(?:\s*;|\s*</script)', login_error, 'login'),
- None)
-
- for kind in ('error', 'password', 'nonFields'):
- error = str_or_none(login.get('%sError' % kind))
- if error:
- raise ExtractorError(
- 'Unable to login: %s' % error, expected=True)
- raise ExtractorError('Unable to log in')
-
-
-class PlatziIE(PlatziBaseIE):
- _VALID_URL = r'''(?x)
- https?://
- (?:
- platzi\.com/clases| # es version
- courses\.platzi\.com/classes # en version
- )/[^/]+/(?P<id>\d+)-[^/?\#&]+
- '''
-
- _TESTS = [{
- 'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/',
- 'md5': '8f56448241005b561c10f11a595b37e3',
- 'info_dict': {
- 'id': '12074',
- 'ext': 'mp4',
- 'title': 'Creando nuestra primera página',
- 'description': 'md5:4c866e45034fc76412fbf6e60ae008bc',
- 'duration': 420,
- },
- 'skip': 'Requires platzi account credentials',
- }, {
- 'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/',
- 'info_dict': {
- 'id': '13430',
- 'ext': 'mp4',
- 'title': 'Background',
- 'description': 'md5:49c83c09404b15e6e71defaf87f6b305',
- 'duration': 360,
- },
- 'skip': 'Requires platzi account credentials',
- 'params': {
- 'skip_download': True,
- },
- }]
-
- def _real_extract(self, url):
- lecture_id = self._match_id(url)
-
- webpage = self._download_webpage(url, lecture_id)
-
- data = self._parse_json(
- self._search_regex(
- # client_data may contain "};" so that we have to try more
- # strict regex first
- (r'client_data\s*=\s*({.+?})\s*;\s*\n',
- r'client_data\s*=\s*({.+?})\s*;'),
- webpage, 'client data'),
- lecture_id)
-
- material = data['initialState']['material']
- desc = material['description']
- title = desc['title']
-
- formats = []
- for server_id, server in material['videos'].items():
- if not isinstance(server, dict):
- continue
- for format_id in ('hls', 'dash'):
- format_url = url_or_none(server.get(format_id))
- if not format_url:
- continue
- if format_id == 'hls':
- formats.extend(self._extract_m3u8_formats(
- format_url, lecture_id, 'mp4',
- entry_protocol='m3u8_native', m3u8_id=format_id,
- note='Downloading %s m3u8 information' % server_id,
- fatal=False))
- elif format_id == 'dash':
- formats.extend(self._extract_mpd_formats(
- format_url, lecture_id, mpd_id=format_id,
- note='Downloading %s MPD manifest' % server_id,
- fatal=False))
- self._sort_formats(formats)
-
- content = str_or_none(desc.get('content'))
- description = (clean_html(compat_b64decode(content).decode('utf-8'))
- if content else None)
- duration = int_or_none(material.get('duration'), invscale=60)
-
- return {
- 'id': lecture_id,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'formats': formats,
- }
-
-
-class PlatziCourseIE(PlatziBaseIE):
- _VALID_URL = r'''(?x)
- https?://
- (?:
- platzi\.com/clases| # es version
- courses\.platzi\.com/classes # en version
- )/(?P<id>[^/?\#&]+)
- '''
- _TESTS = [{
- 'url': 'https://platzi.com/clases/next-js/',
- 'info_dict': {
- 'id': '1311',
- 'title': 'Curso de Next.js',
- },
- 'playlist_count': 22,
- }, {
- 'url': 'https://courses.platzi.com/classes/communication-codestream/',
- 'info_dict': {
- 'id': '1367',
- 'title': 'Codestream Course',
- },
- 'playlist_count': 14,
- }]
-
- @classmethod
- def suitable(cls, url):
- return False if PlatziIE.suitable(url) else super(PlatziCourseIE, cls).suitable(url)
-
- def _real_extract(self, url):
- course_name = self._match_id(url)
-
- webpage = self._download_webpage(url, course_name)
-
- props = self._parse_json(
- self._search_regex(r'data\s*=\s*({.+?})\s*;', webpage, 'data'),
- course_name)['initialProps']
-
- entries = []
- for chapter_num, chapter in enumerate(props['concepts'], 1):
- if not isinstance(chapter, dict):
- continue
- materials = chapter.get('materials')
- if not materials or not isinstance(materials, list):
- continue
- chapter_title = chapter.get('title')
- chapter_id = str_or_none(chapter.get('id'))
- for material in materials:
- if not isinstance(material, dict):
- continue
- if material.get('material_type') != 'video':
- continue
- video_url = urljoin(url, material.get('url'))
- if not video_url:
- continue
- entries.append({
- '_type': 'url_transparent',
- 'url': video_url,
- 'title': str_or_none(material.get('name')),
- 'id': str_or_none(material.get('id')),
- 'ie_key': PlatziIE.ie_key(),
- 'chapter': chapter_title,
- 'chapter_number': chapter_num,
- 'chapter_id': chapter_id,
- })
-
- course_id = compat_str(try_get(props, lambda x: x['course']['id']))
- course_title = try_get(props, lambda x: x['course']['name'], compat_str)
-
- return self.playlist_result(entries, course_id, course_title)
diff --git a/youtube_dl/extractor/pokemon.py b/youtube_dl/extractor/pokemon.py
deleted file mode 100644
index dd5f17f11..000000000
--- a/youtube_dl/extractor/pokemon.py
+++ /dev/null
@@ -1,75 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- extract_attributes,
- int_or_none,
-)
-
-
-class PokemonIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/(?:[^/]+/)+(?P<display_id>[^/?#&]+))'
- _TESTS = [{
- 'url': 'https://www.pokemon.com/us/pokemon-episodes/20_30-the-ol-raise-and-switch/',
- 'md5': '2fe8eaec69768b25ef898cda9c43062e',
- 'info_dict': {
- 'id': 'afe22e30f01c41f49d4f1d9eab5cd9a4',
- 'ext': 'mp4',
- 'title': 'The Ol’ Raise and Switch!',
- 'description': 'md5:7db77f7107f98ba88401d3adc80ff7af',
- 'timestamp': 1511824728,
- 'upload_date': '20171127',
- },
- 'add_id': ['LimelightMedia'],
- }, {
- # no data-video-title
- 'url': 'https://www.pokemon.com/us/pokemon-episodes/pokemon-movies/pokemon-the-rise-of-darkrai-2008',
- 'info_dict': {
- 'id': '99f3bae270bf4e5097274817239ce9c8',
- 'ext': 'mp4',
- 'title': 'Pokémon: The Rise of Darkrai',
- 'description': 'md5:ea8fbbf942e1e497d54b19025dd57d9d',
- 'timestamp': 1417778347,
- 'upload_date': '20141205',
- },
- 'add_id': ['LimelightMedia'],
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2',
- 'only_matching': True,
- }, {
- 'url': 'http://www.pokemon.com/fr/episodes-pokemon/18_09-un-hiver-inattendu/',
- 'only_matching': True,
- }, {
- 'url': 'http://www.pokemon.com/de/pokemon-folgen/01_20-bye-bye-smettbo/',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id, display_id = re.match(self._VALID_URL, url).groups()
- webpage = self._download_webpage(url, video_id or display_id)
- video_data = extract_attributes(self._search_regex(
- r'(<[^>]+data-video-id="%s"[^>]*>)' % (video_id if video_id else '[a-z0-9]{32}'),
- webpage, 'video data element'))
- video_id = video_data['data-video-id']
- title = video_data.get('data-video-title') or self._html_search_meta(
- 'pkm-title', webpage, ' title', default=None) or self._search_regex(
- r'<h1[^>]+\bclass=["\']us-title[^>]+>([^<]+)', webpage, 'title')
- return {
- '_type': 'url_transparent',
- 'id': video_id,
- 'url': 'limelight:media:%s' % video_id,
- 'title': title,
- 'description': video_data.get('data-video-summary'),
- 'thumbnail': video_data.get('data-video-poster'),
- 'series': 'Pokémon',
- 'season_number': int_or_none(video_data.get('data-video-season')),
- 'episode': title,
- 'episode_number': int_or_none(video_data.get('data-video-episode')),
- 'ie_key': 'LimelightMedia',
- }
diff --git a/youtube_dl/extractor/pornhd.py b/youtube_dl/extractor/pornhd.py
deleted file mode 100644
index 27d65d4b9..000000000
--- a/youtube_dl/extractor/pornhd.py
+++ /dev/null
@@ -1,109 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- determine_ext,
- ExtractorError,
- int_or_none,
- js_to_json,
- urljoin,
-)
-
-
-class PornHdIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?'
- _TESTS = [{
- 'url': 'http://www.pornhd.com/videos/9864/selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video',
- 'md5': '87f1540746c1d32ec7a2305c12b96b25',
- 'info_dict': {
- 'id': '9864',
- 'display_id': 'selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video',
- 'ext': 'mp4',
- 'title': 'Restroom selfie masturbation',
- 'description': 'md5:3748420395e03e31ac96857a8f125b2b',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'view_count': int,
- 'like_count': int,
- 'age_limit': 18,
- }
- }, {
- # removed video
- 'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
- 'md5': '956b8ca569f7f4d8ec563e2c41598441',
- 'info_dict': {
- 'id': '1962',
- 'display_id': 'sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
- 'ext': 'mp4',
- 'title': 'Sierra loves doing laundry',
- 'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'view_count': int,
- 'like_count': int,
- 'age_limit': 18,
- },
- 'skip': 'Not available anymore',
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- display_id = mobj.group('display_id')
-
- webpage = self._download_webpage(url, display_id or video_id)
-
- title = self._html_search_regex(
- [r'<span[^>]+class=["\']video-name["\'][^>]*>([^<]+)',
- r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title')
-
- sources = self._parse_json(js_to_json(self._search_regex(
- r"(?s)sources'?\s*[:=]\s*(\{.+?\})",
- webpage, 'sources', default='{}')), video_id)
-
- if not sources:
- message = self._html_search_regex(
- r'(?s)<(div|p)[^>]+class="no-video"[^>]*>(?P<value>.+?)</\1',
- webpage, 'error message', group='value')
- raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
-
- formats = []
- for format_id, video_url in sources.items():
- video_url = urljoin(url, video_url)
- if not video_url:
- continue
- height = int_or_none(self._search_regex(
- r'^(\d+)[pP]', format_id, 'height', default=None))
- formats.append({
- 'url': video_url,
- 'ext': determine_ext(video_url, 'mp4'),
- 'format_id': format_id,
- 'height': height,
- })
- self._sort_formats(formats)
-
- description = self._html_search_regex(
- r'<(div|p)[^>]+class="description"[^>]*>(?P<value>[^<]+)</\1',
- webpage, 'description', fatal=False, group='value')
- view_count = int_or_none(self._html_search_regex(
- r'(\d+) views\s*<', webpage, 'view count', fatal=False))
- thumbnail = self._search_regex(
- r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage,
- 'thumbnail', fatal=False, group='url')
-
- like_count = int_or_none(self._search_regex(
- (r'(\d+)\s*</11[^>]+>(?:&nbsp;|\s)*\blikes',
- r'class=["\']save-count["\'][^>]*>\s*(\d+)'),
- webpage, 'like count', fatal=False))
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'view_count': view_count,
- 'like_count': like_count,
- 'formats': formats,
- 'age_limit': 18,
- }
diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py
deleted file mode 100644
index ba0ad7da2..000000000
--- a/youtube_dl/extractor/pornhub.py
+++ /dev/null
@@ -1,569 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import functools
-import itertools
-import operator
-import re
-
-from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_str,
- compat_urllib_request,
-)
-from .openload import PhantomJSwrapper
-from ..utils import (
- determine_ext,
- ExtractorError,
- int_or_none,
- orderedSet,
- remove_quotes,
- str_to_int,
- url_or_none,
-)
-
-
-class PornHubBaseIE(InfoExtractor):
- def _download_webpage_handle(self, *args, **kwargs):
- def dl(*args, **kwargs):
- return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
-
- webpage, urlh = dl(*args, **kwargs)
-
- if any(re.search(p, webpage) for p in (
- r'<body\b[^>]+\bonload=["\']go\(\)',
- r'document\.cookie\s*=\s*["\']RNKEY=',
- r'document\.location\.reload\(true\)')):
- url_or_request = args[0]
- url = (url_or_request.get_full_url()
- if isinstance(url_or_request, compat_urllib_request.Request)
- else url_or_request)
- phantom = PhantomJSwrapper(self, required_version='2.0')
- phantom.get(url, html=webpage)
- webpage, urlh = dl(*args, **kwargs)
-
- return webpage, urlh
-
-
-class PornHubIE(PornHubBaseIE):
- IE_DESC = 'PornHub and Thumbzilla'
- _VALID_URL = r'''(?x)
- https?://
- (?:
- (?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
- (?:www\.)?thumbzilla\.com/video/
- )
- (?P<id>[\da-z]+)
- '''
- _TESTS = [{
- 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
- 'md5': '1e19b41231a02eba417839222ac9d58e',
- 'info_dict': {
- 'id': '648719015',
- 'ext': 'mp4',
- 'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
- 'uploader': 'Babes',
- 'upload_date': '20130628',
- 'duration': 361,
- 'view_count': int,
- 'like_count': int,
- 'dislike_count': int,
- 'comment_count': int,
- 'age_limit': 18,
- 'tags': list,
- 'categories': list,
- },
- }, {
- # non-ASCII title
- 'url': 'http://www.pornhub.com/view_video.php?viewkey=1331683002',
- 'info_dict': {
- 'id': '1331683002',
- 'ext': 'mp4',
- 'title': '重庆婷婷女王足交',
- 'uploader': 'Unknown',
- 'upload_date': '20150213',
- 'duration': 1753,
- 'view_count': int,
- 'like_count': int,
- 'dislike_count': int,
- 'comment_count': int,
- 'age_limit': 18,
- 'tags': list,
- 'categories': list,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- # subtitles
- 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
- 'info_dict': {
- 'id': 'ph5af5fef7c2aa7',
- 'ext': 'mp4',
- 'title': 'BFFS - Cute Teen Girls Share Cock On the Floor',
- 'uploader': 'BFFs',
- 'duration': 622,
- 'view_count': int,
- 'like_count': int,
- 'dislike_count': int,
- 'comment_count': int,
- 'age_limit': 18,
- 'tags': list,
- 'categories': list,
- 'subtitles': {
- 'en': [{
- "ext": 'srt'
- }]
- },
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
- 'only_matching': True,
- }, {
- # removed at the request of cam4.com
- 'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862',
- 'only_matching': True,
- }, {
- # removed at the request of the copyright owner
- 'url': 'http://www.pornhub.com/view_video.php?viewkey=788152859',
- 'only_matching': True,
- }, {
- # removed by uploader
- 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph572716d15a111',
- 'only_matching': True,
- }, {
- # private video
- 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph56fd731fce6b7',
- 'only_matching': True,
- }, {
- 'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex',
- 'only_matching': True,
- }, {
- 'url': 'http://www.pornhub.com/video/show?viewkey=648719015',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933',
- 'only_matching': True,
- }]
-
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net)/embed/[\da-z]+)',
- webpage)
-
- def _extract_count(self, pattern, webpage, name):
- return str_to_int(self._search_regex(
- pattern, webpage, '%s count' % name, fatal=False))
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- host = mobj.group('host') or 'pornhub.com'
- video_id = mobj.group('id')
-
- self._set_cookie(host, 'age_verified', '1')
-
- def dl_webpage(platform):
- self._set_cookie(host, 'platform', platform)
- return self._download_webpage(
- 'https://www.%s/view_video.php?viewkey=%s' % (host, video_id),
- video_id, 'Downloading %s webpage' % platform)
-
- webpage = dl_webpage('pc')
-
- error_msg = self._html_search_regex(
- r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
- webpage, 'error message', default=None, group='error')
- if error_msg:
- error_msg = re.sub(r'\s+', ' ', error_msg)
- raise ExtractorError(
- 'PornHub said: %s' % error_msg,
- expected=True, video_id=video_id)
-
- # video_title from flashvars contains whitespace instead of non-ASCII (see
- # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
- # on that anymore.
- title = self._html_search_meta(
- 'twitter:title', webpage, default=None) or self._search_regex(
- (r'<h1[^>]+class=["\']title["\'][^>]*>(?P<title>[^<]+)',
- r'<div[^>]+data-video-title=(["\'])(?P<title>.+?)\1',
- r'shareTitle\s*=\s*(["\'])(?P<title>.+?)\1'),
- webpage, 'title', group='title')
-
- video_urls = []
- video_urls_set = set()
- subtitles = {}
-
- flashvars = self._parse_json(
- self._search_regex(
- r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
- video_id)
- if flashvars:
- subtitle_url = url_or_none(flashvars.get('closedCaptionsFile'))
- if subtitle_url:
- subtitles.setdefault('en', []).append({
- 'url': subtitle_url,
- 'ext': 'srt',
- })
- thumbnail = flashvars.get('image_url')
- duration = int_or_none(flashvars.get('video_duration'))
- media_definitions = flashvars.get('mediaDefinitions')
- if isinstance(media_definitions, list):
- for definition in media_definitions:
- if not isinstance(definition, dict):
- continue
- video_url = definition.get('videoUrl')
- if not video_url or not isinstance(video_url, compat_str):
- continue
- if video_url in video_urls_set:
- continue
- video_urls_set.add(video_url)
- video_urls.append(
- (video_url, int_or_none(definition.get('quality'))))
- else:
- thumbnail, duration = [None] * 2
-
- if not video_urls:
- tv_webpage = dl_webpage('tv')
-
- assignments = self._search_regex(
- r'(var.+?mediastring.+?)</script>', tv_webpage,
- 'encoded url').split(';')
-
- js_vars = {}
-
- def parse_js_value(inp):
- inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp)
- if '+' in inp:
- inps = inp.split('+')
- return functools.reduce(
- operator.concat, map(parse_js_value, inps))
- inp = inp.strip()
- if inp in js_vars:
- return js_vars[inp]
- return remove_quotes(inp)
-
- for assn in assignments:
- assn = assn.strip()
- if not assn:
- continue
- assn = re.sub(r'var\s+', '', assn)
- vname, value = assn.split('=', 1)
- js_vars[vname] = parse_js_value(value)
-
- video_url = js_vars['mediastring']
- if video_url not in video_urls_set:
- video_urls.append((video_url, None))
- video_urls_set.add(video_url)
-
- for mobj in re.finditer(
- r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1',
- webpage):
- video_url = mobj.group('url')
- if video_url not in video_urls_set:
- video_urls.append((video_url, None))
- video_urls_set.add(video_url)
-
- upload_date = None
- formats = []
- for video_url, height in video_urls:
- if not upload_date:
- upload_date = self._search_regex(
- r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
- if upload_date:
- upload_date = upload_date.replace('/', '')
- if determine_ext(video_url) == 'mpd':
- formats.extend(self._extract_mpd_formats(
- video_url, video_id, mpd_id='dash', fatal=False))
- continue
- tbr = None
- mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
- if mobj:
- if not height:
- height = int(mobj.group('height'))
- tbr = int(mobj.group('tbr'))
- formats.append({
- 'url': video_url,
- 'format_id': '%dp' % height if height else None,
- 'height': height,
- 'tbr': tbr,
- })
- self._sort_formats(formats)
-
- video_uploader = self._html_search_regex(
- r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
- webpage, 'uploader', fatal=False)
-
- view_count = self._extract_count(
- r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
- like_count = self._extract_count(
- r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
- dislike_count = self._extract_count(
- r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
- comment_count = self._extract_count(
- r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
-
- def extract_list(meta_key):
- div = self._search_regex(
- r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>'
- % meta_key, webpage, meta_key, default=None)
- if div:
- return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div)
-
- return {
- 'id': video_id,
- 'uploader': video_uploader,
- 'upload_date': upload_date,
- 'title': title,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'view_count': view_count,
- 'like_count': like_count,
- 'dislike_count': dislike_count,
- 'comment_count': comment_count,
- 'formats': formats,
- 'age_limit': 18,
- 'tags': extract_list('tags'),
- 'categories': extract_list('categories'),
- 'subtitles': subtitles,
- }
-
-
-class PornHubPlaylistBaseIE(PornHubBaseIE):
- def _extract_entries(self, webpage, host):
- # Only process container div with main playlist content skipping
- # drop-down menu that uses similar pattern for videos (see
- # https://github.com/ytdl-org/youtube-dl/issues/11594).
- container = self._search_regex(
- r'(?s)(<div[^>]+class=["\']container.+)', webpage,
- 'container', default=webpage)
-
- return [
- self.url_result(
- 'http://www.%s/%s' % (host, video_url),
- PornHubIE.ie_key(), video_title=title)
- for video_url, title in orderedSet(re.findall(
- r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
- container))
- ]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- host = mobj.group('host')
- playlist_id = mobj.group('id')
-
- webpage = self._download_webpage(url, playlist_id)
-
- entries = self._extract_entries(webpage, host)
-
- playlist = self._parse_json(
- self._search_regex(
- r'(?:playlistObject|PLAYLIST_VIEW)\s*=\s*({.+?});', webpage,
- 'playlist', default='{}'),
- playlist_id, fatal=False)
- title = playlist.get('title') or self._search_regex(
- r'>Videos\s+in\s+(.+?)\s+[Pp]laylist<', webpage, 'title', fatal=False)
-
- return self.playlist_result(
- entries, playlist_id, title, playlist.get('description'))
-
-
-class PornHubUserIE(PornHubPlaylistBaseIE):
- _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?pornhub\.(?:com|net)/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
- _TESTS = [{
- 'url': 'https://www.pornhub.com/model/zoe_ph',
- 'playlist_mincount': 118,
- }, {
- 'url': 'https://www.pornhub.com/pornstar/liz-vicious',
- 'info_dict': {
- 'id': 'liz-vicious',
- },
- 'playlist_mincount': 118,
- }, {
- 'url': 'https://www.pornhub.com/users/russianveet69',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/channels/povd',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/model/zoe_ph?abc=1',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- user_id = mobj.group('id')
- return self.url_result(
- '%s/videos' % mobj.group('url'), ie=PornHubPagedVideoListIE.ie_key(),
- video_id=user_id)
-
-
-class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
- @staticmethod
- def _has_more(webpage):
- return re.search(
- r'''(?x)
- <li[^>]+\bclass=["\']page_next|
- <link[^>]+\brel=["\']next|
- <button[^>]+\bid=["\']moreDataBtn
- ''', webpage) is not None
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- host = mobj.group('host')
- item_id = mobj.group('id')
-
- page = int_or_none(self._search_regex(
- r'\bpage=(\d+)', url, 'page', default=None))
-
- entries = []
- for page_num in (page, ) if page is not None else itertools.count(1):
- try:
- webpage = self._download_webpage(
- url, item_id, 'Downloading page %d' % page_num,
- query={'page': page_num})
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
- break
- raise
- page_entries = self._extract_entries(webpage, host)
- if not page_entries:
- break
- entries.extend(page_entries)
- if not self._has_more(webpage):
- break
-
- return self.playlist_result(orderedSet(entries), item_id)
-
-
-class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
- _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?P<id>(?:[^/]+/)*[^/?#&]+)'
- _TESTS = [{
- 'url': 'https://www.pornhub.com/model/zoe_ph/videos',
- 'only_matching': True,
- }, {
- 'url': 'http://www.pornhub.com/users/rushandlia/videos',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos',
- 'info_dict': {
- 'id': 'pornstar/jenny-blighe/videos',
- },
- 'playlist_mincount': 149,
- }, {
- 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos?page=3',
- 'info_dict': {
- 'id': 'pornstar/jenny-blighe/videos',
- },
- 'playlist_mincount': 40,
- }, {
- # default sorting as Top Rated Videos
- 'url': 'https://www.pornhub.com/channels/povd/videos',
- 'info_dict': {
- 'id': 'channels/povd/videos',
- },
- 'playlist_mincount': 293,
- }, {
- # Top Rated Videos
- 'url': 'https://www.pornhub.com/channels/povd/videos?o=ra',
- 'only_matching': True,
- }, {
- # Most Recent Videos
- 'url': 'https://www.pornhub.com/channels/povd/videos?o=da',
- 'only_matching': True,
- }, {
- # Most Viewed Videos
- 'url': 'https://www.pornhub.com/channels/povd/videos?o=vi',
- 'only_matching': True,
- }, {
- 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
- 'only_matching': True,
- }, {
- # Most Viewed Videos
- 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=mv',
- 'only_matching': True,
- }, {
- # Top Rated Videos
- 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=tr',
- 'only_matching': True,
- }, {
- # Longest Videos
- 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=lg',
- 'only_matching': True,
- }, {
- # Newest Videos
- 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=cm',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/paid',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/fanonly',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/video',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/video?page=3',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/video/search?search=123',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/categories/teen',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/categories/teen?page=3',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/hd',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/hd?page=3',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/described-video',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/described-video?page=2',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/video/incategories/60fps-1/hd-porn',
- 'only_matching': True,
- }, {
- 'url': 'https://www.pornhub.com/playlist/44121572',
- 'info_dict': {
- 'id': 'playlist/44121572',
- },
- 'playlist_mincount': 132,
- }, {
- 'url': 'https://www.pornhub.com/playlist/4667351',
- 'only_matching': True,
- }, {
- 'url': 'https://de.pornhub.com/playlist/4667351',
- 'only_matching': True,
- }]
-
- @classmethod
- def suitable(cls, url):
- return (False
- if PornHubIE.suitable(url) or PornHubUserIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url)
- else super(PornHubPagedVideoListIE, cls).suitable(url))
-
-
-class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
- _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
- _TESTS = [{
- 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
- 'info_dict': {
- 'id': 'jenny-blighe',
- },
- 'playlist_mincount': 129,
- }, {
- 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
- 'only_matching': True,
- }]
diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py
deleted file mode 100644
index e19a470a5..000000000
--- a/youtube_dl/extractor/prosiebensat1.py
+++ /dev/null
@@ -1,500 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from hashlib import sha1
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- ExtractorError,
- determine_ext,
- float_or_none,
- int_or_none,
- unified_strdate,
-)
-
-
-class ProSiebenSat1BaseIE(InfoExtractor):
- _GEO_COUNTRIES = ['DE']
- _ACCESS_ID = None
- _SUPPORTED_PROTOCOLS = 'dash:clear,hls:clear,progressive:clear'
- _V4_BASE_URL = 'https://vas-v4.p7s1video.net/4.0/get'
-
- def _extract_video_info(self, url, clip_id):
- client_location = url
-
- video = self._download_json(
- 'http://vas.sim-technik.de/vas/live/v2/videos',
- clip_id, 'Downloading videos JSON', query={
- 'access_token': self._TOKEN,
- 'client_location': client_location,
- 'client_name': self._CLIENT_NAME,
- 'ids': clip_id,
- })[0]
-
- if video.get('is_protected') is True:
- raise ExtractorError('This video is DRM protected.', expected=True)
-
- formats = []
- if self._ACCESS_ID:
- raw_ct = self._ENCRYPTION_KEY + clip_id + self._IV + self._ACCESS_ID
- server_token = (self._download_json(
- self._V4_BASE_URL + 'protocols', clip_id,
- 'Downloading protocols JSON',
- headers=self.geo_verification_headers(), query={
- 'access_id': self._ACCESS_ID,
- 'client_token': sha1((raw_ct).encode()).hexdigest(),
- 'video_id': clip_id,
- }, fatal=False) or {}).get('server_token')
- if server_token:
- urls = (self._download_json(
- self._V4_BASE_URL + 'urls', clip_id, 'Downloading urls JSON', query={
- 'access_id': self._ACCESS_ID,
- 'client_token': sha1((raw_ct + server_token + self._SUPPORTED_PROTOCOLS).encode()).hexdigest(),
- 'protocols': self._SUPPORTED_PROTOCOLS,
- 'server_token': server_token,
- 'video_id': clip_id,
- }, fatal=False) or {}).get('urls') or {}
- for protocol, variant in urls.items():
- source_url = variant.get('clear', {}).get('url')
- if not source_url:
- continue
- if protocol == 'dash':
- formats.extend(self._extract_mpd_formats(
- source_url, clip_id, mpd_id=protocol, fatal=False))
- elif protocol == 'hls':
- formats.extend(self._extract_m3u8_formats(
- source_url, clip_id, 'mp4', 'm3u8_native',
- m3u8_id=protocol, fatal=False))
- else:
- formats.append({
- 'url': source_url,
- 'format_id': protocol,
- })
- if not formats:
- source_ids = [compat_str(source['id']) for source in video['sources']]
-
- client_id = self._SALT[:2] + sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
-
- sources = self._download_json(
- 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id,
- clip_id, 'Downloading sources JSON', query={
- 'access_token': self._TOKEN,
- 'client_id': client_id,
- 'client_location': client_location,
- 'client_name': self._CLIENT_NAME,
- })
- server_id = sources['server_id']
-
- def fix_bitrate(bitrate):
- bitrate = int_or_none(bitrate)
- if not bitrate:
- return None
- return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
-
- for source_id in source_ids:
- client_id = self._SALT[:2] + sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
- urls = self._download_json(
- 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id,
- clip_id, 'Downloading urls JSON', fatal=False, query={
- 'access_token': self._TOKEN,
- 'client_id': client_id,
- 'client_location': client_location,
- 'client_name': self._CLIENT_NAME,
- 'server_id': server_id,
- 'source_ids': source_id,
- })
- if not urls:
- continue
- if urls.get('status_code') != 0:
- raise ExtractorError('This video is unavailable', expected=True)
- urls_sources = urls['sources']
- if isinstance(urls_sources, dict):
- urls_sources = urls_sources.values()
- for source in urls_sources:
- source_url = source.get('url')
- if not source_url:
- continue
- protocol = source.get('protocol')
- mimetype = source.get('mimetype')
- if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m':
- formats.extend(self._extract_f4m_formats(
- source_url, clip_id, f4m_id='hds', fatal=False))
- elif mimetype == 'application/x-mpegURL':
- formats.extend(self._extract_m3u8_formats(
- source_url, clip_id, 'mp4', 'm3u8_native',
- m3u8_id='hls', fatal=False))
- elif mimetype == 'application/dash+xml':
- formats.extend(self._extract_mpd_formats(
- source_url, clip_id, mpd_id='dash', fatal=False))
- else:
- tbr = fix_bitrate(source['bitrate'])
- if protocol in ('rtmp', 'rtmpe'):
- mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url)
- if not mobj:
- continue
- path = mobj.group('path')
- mp4colon_index = path.rfind('mp4:')
- app = path[:mp4colon_index]
- play_path = path[mp4colon_index:]
- formats.append({
- 'url': '%s/%s' % (mobj.group('url'), app),
- 'app': app,
- 'play_path': play_path,
- 'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf',
- 'page_url': 'http://www.prosieben.de',
- 'tbr': tbr,
- 'ext': 'flv',
- 'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''),
- })
- else:
- formats.append({
- 'url': source_url,
- 'tbr': tbr,
- 'format_id': 'http%s' % ('-%d' % tbr if tbr else ''),
- })
- self._sort_formats(formats)
-
- return {
- 'duration': float_or_none(video.get('duration')),
- 'formats': formats,
- }
-
-
-class ProSiebenSat1IE(ProSiebenSat1BaseIE):
- IE_NAME = 'prosiebensat1'
- IE_DESC = 'ProSiebenSat.1 Digital'
- _VALID_URL = r'''(?x)
- https?://
- (?:www\.)?
- (?:
- (?:beta\.)?
- (?:
- prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|7tv|advopedia
- )\.(?:de|at|ch)|
- ran\.de|fem\.com|advopedia\.de|galileo\.tv/video
- )
- /(?P<id>.+)
- '''
-
- _TESTS = [
- {
- # Tests changes introduced in https://github.com/ytdl-org/youtube-dl/pull/6242
- # in response to fixing https://github.com/ytdl-org/youtube-dl/issues/6215:
- # - malformed f4m manifest support
- # - proper handling of URLs starting with `https?://` in 2.0 manifests
- # - recursive child f4m manifests extraction
- 'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge',
- 'info_dict': {
- 'id': '2104602',
- 'ext': 'mp4',
- 'title': 'Episode 18 - Staffel 2',
- 'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
- 'upload_date': '20131231',
- 'duration': 5845.04,
- },
- },
- {
- 'url': 'http://www.prosieben.de/videokatalog/Gesellschaft/Leben/Trends/video-Lady-Umstyling-f%C3%BCr-Audrina-Rebekka-Audrina-Fergen-billig-aussehen-Battal-Modica-700544.html',
- 'info_dict': {
- 'id': '2570327',
- 'ext': 'mp4',
- 'title': 'Lady-Umstyling für Audrina',
- 'description': 'md5:4c16d0c17a3461a0d43ea4084e96319d',
- 'upload_date': '20131014',
- 'duration': 606.76,
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
- 'skip': 'Seems to be broken',
- },
- {
- 'url': 'http://www.prosiebenmaxx.de/tv/experience/video/144-countdown-fuer-die-autowerkstatt-ganze-folge',
- 'info_dict': {
- 'id': '2429369',
- 'ext': 'mp4',
- 'title': 'Countdown für die Autowerkstatt',
- 'description': 'md5:809fc051a457b5d8666013bc40698817',
- 'upload_date': '20140223',
- 'duration': 2595.04,
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
- 'skip': 'This video is unavailable',
- },
- {
- 'url': 'http://www.sixx.de/stars-style/video/sexy-laufen-in-ugg-boots-clip',
- 'info_dict': {
- 'id': '2904997',
- 'ext': 'mp4',
- 'title': 'Sexy laufen in Ugg Boots',
- 'description': 'md5:edf42b8bd5bc4e5da4db4222c5acb7d6',
- 'upload_date': '20140122',
- 'duration': 245.32,
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
- 'skip': 'This video is unavailable',
- },
- {
- 'url': 'http://www.sat1.de/film/der-ruecktritt/video/im-interview-kai-wiesinger-clip',
- 'info_dict': {
- 'id': '2906572',
- 'ext': 'mp4',
- 'title': 'Im Interview: Kai Wiesinger',
- 'description': 'md5:e4e5370652ec63b95023e914190b4eb9',
- 'upload_date': '20140203',
- 'duration': 522.56,
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
- 'skip': 'This video is unavailable',
- },
- {
- 'url': 'http://www.kabeleins.de/tv/rosins-restaurants/videos/jagd-auf-fertigkost-im-elsthal-teil-2-ganze-folge',
- 'info_dict': {
- 'id': '2992323',
- 'ext': 'mp4',
- 'title': 'Jagd auf Fertigkost im Elsthal - Teil 2',
- 'description': 'md5:2669cde3febe9bce13904f701e774eb6',
- 'upload_date': '20141014',
- 'duration': 2410.44,
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
- 'skip': 'This video is unavailable',
- },
- {
- 'url': 'http://www.ran.de/fussball/bundesliga/video/schalke-toennies-moechte-raul-zurueck-ganze-folge',
- 'info_dict': {
- 'id': '3004256',
- 'ext': 'mp4',
- 'title': 'Schalke: Tönnies möchte Raul zurück',
- 'description': 'md5:4b5b271d9bcde223b54390754c8ece3f',
- 'upload_date': '20140226',
- 'duration': 228.96,
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
- 'skip': 'This video is unavailable',
- },
- {
- 'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip',
- 'info_dict': {
- 'id': '2572814',
- 'ext': 'mp4',
- 'title': 'Andreas Kümmert: Rocket Man',
- 'description': 'md5:6ddb02b0781c6adf778afea606652e38',
- 'upload_date': '20131017',
- 'duration': 469.88,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- 'url': 'http://www.fem.com/wellness/videos/wellness-video-clip-kurztripps-zum-valentinstag.html',
- 'info_dict': {
- 'id': '2156342',
- 'ext': 'mp4',
- 'title': 'Kurztrips zum Valentinstag',
- 'description': 'Romantischer Kurztrip zum Valentinstag? Nina Heinemann verrät, was sich hier wirklich lohnt.',
- 'duration': 307.24,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- 'url': 'http://www.prosieben.de/tv/joko-gegen-klaas/videos/playlists/episode-8-ganze-folge-playlist',
- 'info_dict': {
- 'id': '439664',
- 'title': 'Episode 8 - Ganze Folge - Playlist',
- 'description': 'md5:63b8963e71f481782aeea877658dec84',
- },
- 'playlist_count': 2,
- 'skip': 'This video is unavailable',
- },
- {
- 'url': 'http://www.7tv.de/circus-halligalli/615-best-of-circus-halligalli-ganze-folge',
- 'info_dict': {
- 'id': '4187506',
- 'ext': 'mp4',
- 'title': 'Best of Circus HalliGalli',
- 'description': 'md5:8849752efd90b9772c9db6fdf87fb9e9',
- 'upload_date': '20151229',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # title in <h2 class="subtitle">
- 'url': 'http://www.prosieben.de/stars/oscar-award/videos/jetzt-erst-enthuellt-das-geheimnis-von-emma-stones-oscar-robe-clip',
- 'info_dict': {
- 'id': '4895826',
- 'ext': 'mp4',
- 'title': 'Jetzt erst enthüllt: Das Geheimnis von Emma Stones Oscar-Robe',
- 'description': 'md5:e5ace2bc43fadf7b63adc6187e9450b9',
- 'upload_date': '20170302',
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'geo restricted to Germany',
- },
- {
- # geo restricted to Germany
- 'url': 'http://www.kabeleinsdoku.de/tv/mayday-alarm-im-cockpit/video/102-notlandung-im-hudson-river-ganze-folge',
- 'only_matching': True,
- },
- {
- # geo restricted to Germany
- 'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge',
- 'only_matching': True,
- },
- {
- # geo restricted to Germany
- 'url': 'https://www.galileo.tv/video/diese-emojis-werden-oft-missverstanden',
- 'only_matching': True,
- },
- {
- 'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel',
- 'only_matching': True,
- },
- {
- 'url': 'http://www.advopedia.de/videos/lenssen-klaert-auf/lenssen-klaert-auf-folge-8-staffel-3-feiertage-und-freie-tage',
- 'only_matching': True,
- },
- ]
-
- _TOKEN = 'prosieben'
- _SALT = '01!8d8F_)r9]4s[qeuXfP%'
- _CLIENT_NAME = 'kolibri-2.0.19-splec4'
-
- _ACCESS_ID = 'x_prosiebenmaxx-de'
- _ENCRYPTION_KEY = 'Eeyeey9oquahthainoofashoyoikosag'
- _IV = 'Aeluchoc6aevechuipiexeeboowedaok'
-
- _CLIPID_REGEXES = [
- r'"clip_id"\s*:\s+"(\d+)"',
- r'clipid: "(\d+)"',
- r'clip[iI]d=(\d+)',
- r'clip[iI][dD]\s*=\s*["\'](\d+)',
- r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
- r'proMamsId&quot;\s*:\s*&quot;(\d+)',
- r'proMamsId"\s*:\s*"(\d+)',
- ]
- _TITLE_REGEXES = [
- r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',
- r'<header class="clearfix">\s*<h3>(.+?)</h3>',
- r'<!-- start video -->\s*<h1>(.+?)</h1>',
- r'<h1 class="att-name">\s*(.+?)</h1>',
- r'<header class="module_header">\s*<h2>([^<]+)</h2>\s*</header>',
- r'<h2 class="video-title" itemprop="name">\s*(.+?)</h2>',
- r'<div[^>]+id="veeseoTitle"[^>]*>(.+?)</div>',
- r'<h2[^>]+class="subtitle"[^>]*>([^<]+)</h2>',
- ]
- _DESCRIPTION_REGEXES = [
- r'<p itemprop="description">\s*(.+?)</p>',
- r'<div class="videoDecription">\s*<p><strong>Beschreibung</strong>: (.+?)</p>',
- r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>',
- r'<p class="att-description">\s*(.+?)\s*</p>',
- r'<p class="video-description" itemprop="description">\s*(.+?)</p>',
- r'<div[^>]+id="veeseoDescription"[^>]*>(.+?)</div>',
- ]
- _UPLOAD_DATE_REGEXES = [
- r'<meta property="og:published_time" content="(.+?)">',
- r'<span>\s*(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}) \|\s*<span itemprop="duration"',
- r'<footer>\s*(\d{2}\.\d{2}\.\d{4}) \d{2}:\d{2} Uhr',
- r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>',
- r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>',
- ]
- _PAGE_TYPE_REGEXES = [
- r'<meta name="page_type" content="([^"]+)">',
- r"'itemType'\s*:\s*'([^']*)'",
- ]
- _PLAYLIST_ID_REGEXES = [
- r'content[iI]d=(\d+)',
- r"'itemId'\s*:\s*'([^']*)'",
- ]
- _PLAYLIST_CLIP_REGEXES = [
- r'(?s)data-qvt=.+?<a href="([^"]+)"',
- ]
-
- def _extract_clip(self, url, webpage):
- clip_id = self._html_search_regex(
- self._CLIPID_REGEXES, webpage, 'clip id')
- title = self._html_search_regex(
- self._TITLE_REGEXES, webpage, 'title',
- default=None) or self._og_search_title(webpage)
- info = self._extract_video_info(url, clip_id)
- description = self._html_search_regex(
- self._DESCRIPTION_REGEXES, webpage, 'description', default=None)
- if description is None:
- description = self._og_search_description(webpage)
- thumbnail = self._og_search_thumbnail(webpage)
- upload_date = unified_strdate(self._html_search_regex(
- self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None))
-
- info.update({
- 'id': clip_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'upload_date': upload_date,
- })
- return info
-
- def _extract_playlist(self, url, webpage):
- playlist_id = self._html_search_regex(
- self._PLAYLIST_ID_REGEXES, webpage, 'playlist id')
- playlist = self._parse_json(
- self._search_regex(
- r'var\s+contentResources\s*=\s*(\[.+?\]);\s*</script',
- webpage, 'playlist'),
- playlist_id)
- entries = []
- for item in playlist:
- clip_id = item.get('id') or item.get('upc')
- if not clip_id:
- continue
- info = self._extract_video_info(url, clip_id)
- info.update({
- 'id': clip_id,
- 'title': item.get('title') or item.get('teaser', {}).get('headline'),
- 'description': item.get('teaser', {}).get('description'),
- 'thumbnail': item.get('poster'),
- 'duration': float_or_none(item.get('duration')),
- 'series': item.get('tvShowTitle'),
- 'uploader': item.get('broadcastPublisher'),
- })
- entries.append(info)
- return self.playlist_result(entries, playlist_id)
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- page_type = self._search_regex(
- self._PAGE_TYPE_REGEXES, webpage,
- 'page type', default='clip').lower()
- if page_type == 'clip':
- return self._extract_clip(url, webpage)
- elif page_type == 'playlist':
- return self._extract_playlist(url, webpage)
- else:
- raise ExtractorError(
- 'Unsupported page type %s' % page_type, expected=True)
diff --git a/youtube_dl/extractor/puhutv.py b/youtube_dl/extractor/puhutv.py
deleted file mode 100644
index 5465e8ab7..000000000
--- a/youtube_dl/extractor/puhutv.py
+++ /dev/null
@@ -1,247 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_str,
-)
-from ..utils import (
- ExtractorError,
- int_or_none,
- float_or_none,
- parse_resolution,
- str_or_none,
- try_get,
- unified_timestamp,
- url_or_none,
- urljoin,
-)
-
-
-class PuhuTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-izle'
- IE_NAME = 'puhutv'
- _TESTS = [{
- # film
- 'url': 'https://puhutv.com/sut-kardesler-izle',
- 'md5': 'fbd8f2d8e7681f8bcd51b592475a6ae7',
- 'info_dict': {
- 'id': '5085',
- 'display_id': 'sut-kardesler',
- 'ext': 'mp4',
- 'title': 'Süt Kardeşler',
- 'description': 'md5:405fd024df916ca16731114eb18e511a',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 4832.44,
- 'creator': 'Arzu Film',
- 'timestamp': 1469778212,
- 'upload_date': '20160729',
- 'release_year': 1976,
- 'view_count': int,
- 'tags': ['Aile', 'Komedi', 'Klasikler'],
- },
- }, {
- # episode, geo restricted, bypassable with --geo-verification-proxy
- 'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle',
- 'only_matching': True,
- }, {
- # 4k, with subtitles
- 'url': 'https://puhutv.com/dip-1-bolum-izle',
- 'only_matching': True,
- }]
- _SUBTITLE_LANGS = {
- 'English': 'en',
- 'Deutsch': 'de',
- 'عربى': 'ar'
- }
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- info = self._download_json(
- urljoin(url, '/api/slug/%s-izle' % display_id),
- display_id)['data']
-
- video_id = compat_str(info['id'])
- title = info.get('name') or info['title']['name']
- if info.get('display_name'):
- title = '%s %s' % (title, info.get('display_name'))
-
- try:
- videos = self._download_json(
- 'https://puhutv.com/api/assets/%s/videos' % video_id,
- display_id, 'Downloading video JSON',
- headers=self.geo_verification_headers())
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
- self.raise_geo_restricted()
- raise
-
- formats = []
- for video in videos['data']['videos']:
- media_url = url_or_none(video.get('url'))
- if not media_url:
- continue
- playlist = video.get('is_playlist')
- if video.get('stream_type') == 'hls' and playlist is True:
- formats.extend(self._extract_m3u8_formats(
- media_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
- continue
- quality = int_or_none(video.get('quality'))
- f = {
- 'url': media_url,
- 'ext': 'mp4',
- 'height': quality
- }
- video_format = video.get('video_format')
- if video_format == 'hls' and playlist is False:
- format_id = 'hls'
- f['protocol'] = 'm3u8_native'
- elif video_format == 'mp4':
- format_id = 'http'
-
- else:
- continue
- if quality:
- format_id += '-%sp' % quality
- f['format_id'] = format_id
- formats.append(f)
- self._sort_formats(formats)
-
- description = try_get(
- info, lambda x: x['title']['description'],
- compat_str) or info.get('description')
- timestamp = unified_timestamp(info.get('created_at'))
- creator = try_get(
- info, lambda x: x['title']['producer']['name'], compat_str)
-
- duration = float_or_none(
- try_get(info, lambda x: x['content']['duration_in_ms'], int),
- scale=1000)
- view_count = try_get(info, lambda x: x['content']['watch_count'], int)
-
- images = try_get(
- info, lambda x: x['content']['images']['wide'], dict) or {}
- thumbnails = []
- for image_id, image_url in images.items():
- if not isinstance(image_url, compat_str):
- continue
- if not image_url.startswith(('http', '//')):
- image_url = 'https://%s' % image_url
- t = parse_resolution(image_id)
- t.update({
- 'id': image_id,
- 'url': image_url
- })
- thumbnails.append(t)
-
- release_year = try_get(info, lambda x: x['title']['released_at'], int)
-
- season_number = int_or_none(info.get('season_number'))
- season_id = str_or_none(info.get('season_id'))
- episode_number = int_or_none(info.get('episode_number'))
-
- tags = []
- for genre in try_get(info, lambda x: x['title']['genres'], list) or []:
- if not isinstance(genre, dict):
- continue
- genre_name = genre.get('name')
- if genre_name and isinstance(genre_name, compat_str):
- tags.append(genre_name)
-
- subtitles = {}
- for subtitle in try_get(
- info, lambda x: x['content']['subtitles'], list) or []:
- if not isinstance(subtitle, dict):
- continue
- lang = subtitle.get('language')
- sub_url = url_or_none(subtitle.get('url'))
- if not lang or not isinstance(lang, compat_str) or not sub_url:
- continue
- subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
- 'url': sub_url
- }]
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': description,
- 'season_id': season_id,
- 'season_number': season_number,
- 'episode_number': episode_number,
- 'release_year': release_year,
- 'timestamp': timestamp,
- 'creator': creator,
- 'view_count': view_count,
- 'duration': duration,
- 'tags': tags,
- 'subtitles': subtitles,
- 'thumbnails': thumbnails,
- 'formats': formats
- }
-
-
-class PuhuTVSerieIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-detay'
- IE_NAME = 'puhutv:serie'
- _TESTS = [{
- 'url': 'https://puhutv.com/deniz-yildizi-detay',
- 'info_dict': {
- 'title': 'Deniz Yıldızı',
- 'id': 'deniz-yildizi',
- },
- 'playlist_mincount': 205,
- }, {
- # a film detail page which is using same url with serie page
- 'url': 'https://puhutv.com/kaybedenler-kulubu-detay',
- 'only_matching': True,
- }]
-
- def _extract_entries(self, seasons):
- for season in seasons:
- season_id = season.get('id')
- if not season_id:
- continue
- page = 1
- has_more = True
- while has_more is True:
- season = self._download_json(
- 'https://galadriel.puhutv.com/seasons/%s' % season_id,
- season_id, 'Downloading page %s' % page, query={
- 'page': page,
- 'per': 40,
- })
- episodes = season.get('episodes')
- if isinstance(episodes, list):
- for ep in episodes:
- slug_path = str_or_none(ep.get('slugPath'))
- if not slug_path:
- continue
- video_id = str_or_none(int_or_none(ep.get('id')))
- yield self.url_result(
- 'https://puhutv.com/%s' % slug_path,
- ie=PuhuTVIE.ie_key(), video_id=video_id,
- video_title=ep.get('name') or ep.get('eventLabel'))
- page += 1
- has_more = season.get('hasMore')
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
-
- info = self._download_json(
- urljoin(url, '/api/slug/%s-detay' % playlist_id),
- playlist_id)['data']
-
- seasons = info.get('seasons')
- if seasons:
- return self.playlist_result(
- self._extract_entries(seasons), playlist_id, info.get('name'))
-
- # For films, these are using same url with series
- video_id = info.get('slug') or info['assets'][0]['slug']
- return self.url_result(
- 'https://puhutv.com/%s-izle' % video_id,
- PuhuTVIE.ie_key(), video_id)
diff --git a/youtube_dl/extractor/redtube.py b/youtube_dl/extractor/redtube.py
deleted file mode 100644
index 5c84028ef..000000000
--- a/youtube_dl/extractor/redtube.py
+++ /dev/null
@@ -1,119 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- int_or_none,
- merge_dicts,
- str_to_int,
- unified_strdate,
- url_or_none,
-)
-
-
-class RedTubeIE(InfoExtractor):
- _VALID_URL = r'https?://(?:(?:www\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
- _TESTS = [{
- 'url': 'http://www.redtube.com/66418',
- 'md5': 'fc08071233725f26b8f014dba9590005',
- 'info_dict': {
- 'id': '66418',
- 'ext': 'mp4',
- 'title': 'Sucked on a toilet',
- 'upload_date': '20110811',
- 'duration': 596,
- 'view_count': int,
- 'age_limit': 18,
- }
- }, {
- 'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286',
- 'only_matching': True,
- }]
-
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)',
- webpage)
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(
- 'http://www.redtube.com/%s' % video_id, video_id)
-
- if any(s in webpage for s in ['video-deleted-info', '>This video has been removed']):
- raise ExtractorError('Video %s has been removed' % video_id, expected=True)
-
- info = self._search_json_ld(webpage, video_id, default={})
-
- if not info.get('title'):
- info['title'] = self._html_search_regex(
- (r'<h(\d)[^>]+class="(?:video_title_text|videoTitle)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>',
- r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',),
- webpage, 'title', group='title',
- default=None) or self._og_search_title(webpage)
-
- formats = []
- sources = self._parse_json(
- self._search_regex(
- r'sources\s*:\s*({.+?})', webpage, 'source', default='{}'),
- video_id, fatal=False)
- if sources and isinstance(sources, dict):
- for format_id, format_url in sources.items():
- if format_url:
- formats.append({
- 'url': format_url,
- 'format_id': format_id,
- 'height': int_or_none(format_id),
- })
- medias = self._parse_json(
- self._search_regex(
- r'mediaDefinition\s*:\s*(\[.+?\])', webpage,
- 'media definitions', default='{}'),
- video_id, fatal=False)
- if medias and isinstance(medias, list):
- for media in medias:
- format_url = url_or_none(media.get('videoUrl'))
- if not format_url:
- continue
- format_id = media.get('quality')
- formats.append({
- 'url': format_url,
- 'format_id': format_id,
- 'height': int_or_none(format_id),
- })
- if not formats:
- video_url = self._html_search_regex(
- r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
- formats.append({'url': video_url})
- self._sort_formats(formats)
-
- thumbnail = self._og_search_thumbnail(webpage)
- upload_date = unified_strdate(self._search_regex(
- r'<span[^>]+>(?:ADDED|Published on) ([^<]+)<',
- webpage, 'upload date', default=None))
- duration = int_or_none(self._og_search_property(
- 'video:duration', webpage, default=None) or self._search_regex(
- r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None))
- view_count = str_to_int(self._search_regex(
- (r'<div[^>]*>Views</div>\s*<div[^>]*>\s*([\d,.]+)',
- r'<span[^>]*>VIEWS</span>\s*</td>\s*<td>\s*([\d,.]+)',
- r'<span[^>]+\bclass=["\']video_view_count[^>]*>\s*([\d,.]+)'),
- webpage, 'view count', default=None))
-
- # No self-labeling, but they describe themselves as
- # "Home of Videos Porno"
- age_limit = 18
-
- return merge_dicts(info, {
- 'id': video_id,
- 'ext': 'mp4',
- 'thumbnail': thumbnail,
- 'upload_date': upload_date,
- 'duration': duration,
- 'view_count': view_count,
- 'age_limit': age_limit,
- 'formats': formats,
- })
diff --git a/youtube_dl/extractor/revision3.py b/youtube_dl/extractor/revision3.py
deleted file mode 100644
index 833d8a2f0..000000000
--- a/youtube_dl/extractor/revision3.py
+++ /dev/null
@@ -1,170 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- int_or_none,
- parse_iso8601,
- unescapeHTML,
- qualities,
-)
-
-
-class Revision3EmbedIE(InfoExtractor):
- IE_NAME = 'revision3:embed'
- _VALID_URL = r'(?:revision3:(?:(?P<playlist_type>[^:]+):)?|https?://(?:(?:(?:www|embed)\.)?(?:revision3|animalist)|(?:(?:api|embed)\.)?seekernetwork)\.com/player/embed\?videoId=)(?P<playlist_id>\d+)'
- _TEST = {
- 'url': 'http://api.seekernetwork.com/player/embed?videoId=67558',
- 'md5': '83bcd157cab89ad7318dd7b8c9cf1306',
- 'info_dict': {
- 'id': '67558',
- 'ext': 'mp4',
- 'title': 'The Pros & Cons Of Zoos',
- 'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?',
- 'uploader_id': 'dnews',
- 'uploader': 'DNews',
- }
- }
- _API_KEY = 'ba9c741bce1b9d8e3defcc22193f3651b8867e62'
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- playlist_id = mobj.group('playlist_id')
- playlist_type = mobj.group('playlist_type') or 'video_id'
- video_data = self._download_json(
- 'http://revision3.com/api/getPlaylist.json', playlist_id, query={
- 'api_key': self._API_KEY,
- 'codecs': 'h264,vp8,theora',
- playlist_type: playlist_id,
- })['items'][0]
-
- formats = []
- for vcodec, media in video_data['media'].items():
- for quality_id, quality in media.items():
- if quality_id == 'hls':
- formats.extend(self._extract_m3u8_formats(
- quality['url'], playlist_id, 'mp4',
- 'm3u8_native', m3u8_id='hls', fatal=False))
- else:
- formats.append({
- 'url': quality['url'],
- 'format_id': '%s-%s' % (vcodec, quality_id),
- 'tbr': int_or_none(quality.get('bitrate')),
- 'vcodec': vcodec,
- })
- self._sort_formats(formats)
-
- return {
- 'id': playlist_id,
- 'title': unescapeHTML(video_data['title']),
- 'description': unescapeHTML(video_data.get('summary')),
- 'uploader': video_data.get('show', {}).get('name'),
- 'uploader_id': video_data.get('show', {}).get('slug'),
- 'duration': int_or_none(video_data.get('duration')),
- 'formats': formats,
- }
-
-
-class Revision3IE(InfoExtractor):
- IE_NAME = 'revision'
- _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:revision3|animalist)\.com)/(?P<id>[^/]+(?:/[^/?#]+)?)'
- _TESTS = [{
- 'url': 'http://www.revision3.com/technobuffalo/5-google-predictions-for-2016',
- 'md5': 'd94a72d85d0a829766de4deb8daaf7df',
- 'info_dict': {
- 'id': '71089',
- 'display_id': 'technobuffalo/5-google-predictions-for-2016',
- 'ext': 'webm',
- 'title': '5 Google Predictions for 2016',
- 'description': 'Google had a great 2015, but it\'s already time to look ahead. Here are our five predictions for 2016.',
- 'upload_date': '20151228',
- 'timestamp': 1451325600,
- 'duration': 187,
- 'uploader': 'TechnoBuffalo',
- 'uploader_id': 'technobuffalo',
- }
- }, {
- # Show
- 'url': 'http://revision3.com/variant',
- 'only_matching': True,
- }, {
- # Tag
- 'url': 'http://revision3.com/vr',
- 'only_matching': True,
- }]
- _PAGE_DATA_TEMPLATE = 'http://www.%s/apiProxy/ddn/%s?domain=%s'
-
- def _real_extract(self, url):
- domain, display_id = re.match(self._VALID_URL, url).groups()
- site = domain.split('.')[0]
- page_info = self._download_json(
- self._PAGE_DATA_TEMPLATE % (domain, display_id, domain), display_id)
-
- page_data = page_info['data']
- page_type = page_data['type']
- if page_type in ('episode', 'embed'):
- show_data = page_data['show']['data']
- page_id = compat_str(page_data['id'])
- video_id = compat_str(page_data['video']['data']['id'])
-
- preference = qualities(['mini', 'small', 'medium', 'large'])
- thumbnails = [{
- 'url': image_url,
- 'id': image_id,
- 'preference': preference(image_id)
- } for image_id, image_url in page_data.get('images', {}).items()]
-
- info = {
- 'id': page_id,
- 'display_id': display_id,
- 'title': unescapeHTML(page_data['name']),
- 'description': unescapeHTML(page_data.get('summary')),
- 'timestamp': parse_iso8601(page_data.get('publishTime'), ' '),
- 'author': page_data.get('author'),
- 'uploader': show_data.get('name'),
- 'uploader_id': show_data.get('slug'),
- 'thumbnails': thumbnails,
- 'extractor_key': site,
- }
-
- if page_type == 'embed':
- info.update({
- '_type': 'url_transparent',
- 'url': page_data['video']['data']['embed'],
- })
- return info
-
- info.update({
- '_type': 'url_transparent',
- 'url': 'revision3:%s' % video_id,
- })
- return info
- else:
- list_data = page_info[page_type]['data']
- episodes_data = page_info['episodes']['data']
- num_episodes = page_info['meta']['totalEpisodes']
- processed_episodes = 0
- entries = []
- page_num = 1
- while True:
- entries.extend([{
- '_type': 'url',
- 'url': 'http://%s%s' % (domain, episode['path']),
- 'id': compat_str(episode['id']),
- 'ie_key': 'Revision3',
- 'extractor_key': site,
- } for episode in episodes_data])
- processed_episodes += len(episodes_data)
- if processed_episodes == num_episodes:
- break
- page_num += 1
- episodes_data = self._download_json(self._PAGE_DATA_TEMPLATE % (
- domain, display_id + '/' + compat_str(page_num), domain),
- display_id)['episodes']['data']
-
- return self.playlist_result(
- entries, compat_str(list_data['id']),
- list_data.get('name'), list_data.get('summary'))
diff --git a/youtube_dl/extractor/roosterteeth.py b/youtube_dl/extractor/roosterteeth.py
deleted file mode 100644
index 8d88ee499..000000000
--- a/youtube_dl/extractor/roosterteeth.py
+++ /dev/null
@@ -1,148 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_str,
-)
-from ..utils import (
- ExtractorError,
- int_or_none,
- str_or_none,
- urlencode_postdata,
-)
-
-
-class RoosterTeethIE(InfoExtractor):
- _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:episode|watch)/(?P<id>[^/?#&]+)'
- _LOGIN_URL = 'https://roosterteeth.com/login'
- _NETRC_MACHINE = 'roosterteeth'
- _TESTS = [{
- 'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
- 'md5': 'e2bd7764732d785ef797700a2489f212',
- 'info_dict': {
- 'id': '9156',
- 'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement',
- 'ext': 'mp4',
- 'title': 'Million Dollars, But... The Game Announcement',
- 'description': 'md5:168a54b40e228e79f4ddb141e89fe4f5',
- 'thumbnail': r're:^https?://.*\.png$',
- 'series': 'Million Dollars, But...',
- 'episode': 'Million Dollars, But... The Game Announcement',
- },
- }, {
- 'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31',
- 'only_matching': True,
- }, {
- 'url': 'http://funhaus.roosterteeth.com/episode/funhaus-shorts-2016-austin-sucks-funhaus-shorts',
- 'only_matching': True,
- }, {
- 'url': 'http://screwattack.roosterteeth.com/episode/death-battle-season-3-mewtwo-vs-shadow',
- 'only_matching': True,
- }, {
- 'url': 'http://theknow.roosterteeth.com/episode/the-know-game-news-season-1-boring-steam-sales-are-better',
- 'only_matching': True,
- }, {
- # only available for FIRST members
- 'url': 'http://roosterteeth.com/episode/rt-docs-the-world-s-greatest-head-massage-the-world-s-greatest-head-massage-an-asmr-journey-part-one',
- 'only_matching': True,
- }, {
- 'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
- 'only_matching': True,
- }]
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
- login_page = self._download_webpage(
- self._LOGIN_URL, None,
- note='Downloading login page',
- errnote='Unable to download login page')
-
- login_form = self._hidden_inputs(login_page)
-
- login_form.update({
- 'username': username,
- 'password': password,
- })
-
- login_request = self._download_webpage(
- self._LOGIN_URL, None,
- note='Logging in',
- data=urlencode_postdata(login_form),
- headers={
- 'Referer': self._LOGIN_URL,
- })
-
- if not any(re.search(p, login_request) for p in (
- r'href=["\']https?://(?:www\.)?roosterteeth\.com/logout"',
- r'>Sign Out<')):
- error = self._html_search_regex(
- r'(?s)<div[^>]+class=(["\']).*?\balert-danger\b.*?\1[^>]*>(?:\s*<button[^>]*>.*?</button>)?(?P<error>.+?)</div>',
- login_request, 'alert', default=None, group='error')
- if error:
- raise ExtractorError('Unable to login: %s' % error, expected=True)
- raise ExtractorError('Unable to log in')
-
- def _real_initialize(self):
- self._login()
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- api_episode_url = 'https://svod-be.roosterteeth.com/api/v1/episodes/%s' % display_id
-
- try:
- m3u8_url = self._download_json(
- api_episode_url + '/videos', display_id,
- 'Downloading video JSON metadata')['data'][0]['attributes']['url']
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
- if self._parse_json(e.cause.read().decode(), display_id).get('access') is False:
- self.raise_login_required(
- '%s is only available for FIRST members' % display_id)
- raise
-
- formats = self._extract_m3u8_formats(
- m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls')
- self._sort_formats(formats)
-
- episode = self._download_json(
- api_episode_url, display_id,
- 'Downloading episode JSON metadata')['data'][0]
- attributes = episode['attributes']
- title = attributes.get('title') or attributes['display_title']
- video_id = compat_str(episode['id'])
-
- thumbnails = []
- for image in episode.get('included', {}).get('images', []):
- if image.get('type') == 'episode_image':
- img_attributes = image.get('attributes') or {}
- for k in ('thumb', 'small', 'medium', 'large'):
- img_url = img_attributes.get(k)
- if img_url:
- thumbnails.append({
- 'id': k,
- 'url': img_url,
- })
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': attributes.get('description') or attributes.get('caption'),
- 'thumbnails': thumbnails,
- 'series': attributes.get('show_title'),
- 'season_number': int_or_none(attributes.get('season_number')),
- 'season_id': attributes.get('season_id'),
- 'episode': title,
- 'episode_number': int_or_none(attributes.get('number')),
- 'episode_id': str_or_none(episode.get('uuid')),
- 'formats': formats,
- 'channel_id': attributes.get('channel_id'),
- 'duration': int_or_none(attributes.get('length')),
- }
diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py
deleted file mode 100644
index bd9ee1647..000000000
--- a/youtube_dl/extractor/safari.py
+++ /dev/null
@@ -1,264 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import json
-import re
-
-from .common import InfoExtractor
-
-from ..compat import (
- compat_parse_qs,
- compat_str,
- compat_urlparse,
-)
-from ..utils import (
- ExtractorError,
- update_url_query,
-)
-
-
-class SafariBaseIE(InfoExtractor):
- _LOGIN_URL = 'https://learning.oreilly.com/accounts/login/'
- _NETRC_MACHINE = 'safari'
-
- _API_BASE = 'https://learning.oreilly.com/api/v1'
- _API_FORMAT = 'json'
-
- LOGGED_IN = False
-
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
- _, urlh = self._download_webpage_handle(
- 'https://learning.oreilly.com/accounts/login-check/', None,
- 'Downloading login page')
-
- def is_logged(urlh):
- return 'learning.oreilly.com/home/' in compat_str(urlh.geturl())
-
- if is_logged(urlh):
- self.LOGGED_IN = True
- return
-
- redirect_url = compat_str(urlh.geturl())
- parsed_url = compat_urlparse.urlparse(redirect_url)
- qs = compat_parse_qs(parsed_url.query)
- next_uri = compat_urlparse.urljoin(
- 'https://api.oreilly.com', qs['next'][0])
-
- auth, urlh = self._download_json_handle(
- 'https://www.oreilly.com/member/auth/login/', None, 'Logging in',
- data=json.dumps({
- 'email': username,
- 'password': password,
- 'redirect_uri': next_uri,
- }).encode(), headers={
- 'Content-Type': 'application/json',
- 'Referer': redirect_url,
- }, expected_status=400)
-
- credentials = auth.get('credentials')
- if (not auth.get('logged_in') and not auth.get('redirect_uri')
- and credentials):
- raise ExtractorError(
- 'Unable to login: %s' % credentials, expected=True)
-
- # oreilly serves two same instances of the following cookies
- # in Set-Cookie header and expects first one to be actually set
- for cookie in ('groot_sessionid', 'orm-jwt', 'orm-rt'):
- self._apply_first_set_cookie_header(urlh, cookie)
-
- _, urlh = self._download_webpage_handle(
- auth.get('redirect_uri') or next_uri, None, 'Completing login',)
-
- if is_logged(urlh):
- self.LOGGED_IN = True
- return
-
- raise ExtractorError('Unable to log in')
-
-
-class SafariIE(SafariBaseIE):
- IE_NAME = 'safari'
- IE_DESC = 'safaribooksonline.com online video'
- _VALID_URL = r'''(?x)
- https?://
- (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/
- (?:
- library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?\#&]+)\.html|
- videos/[^/]+/[^/]+/(?P<reference_id>[^-]+-[^/?\#&]+)
- )
- '''
-
- _TESTS = [{
- 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
- 'md5': 'dcc5a425e79f2564148652616af1f2a3',
- 'info_dict': {
- 'id': '0_qbqx90ic',
- 'ext': 'mp4',
- 'title': 'Introduction to Hadoop Fundamentals LiveLessons',
- 'timestamp': 1437758058,
- 'upload_date': '20150724',
- 'uploader_id': 'stork',
- },
- }, {
- # non-digits in course id
- 'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html',
- 'only_matching': True,
- }, {
- 'url': 'https://www.safaribooksonline.com/library/view/learning-path-red/9780134664057/RHCE_Introduction.html',
- 'only_matching': True,
- }, {
- 'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314/9780134217314-PYMC_13_00',
- 'only_matching': True,
- }, {
- 'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838/9780133392838-00_SeriesIntro',
- 'only_matching': True,
- }, {
- 'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/00_SeriesIntro.html',
- 'only_matching': True,
- }]
-
- _PARTNER_ID = '1926081'
- _UICONF_ID = '29375172'
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
-
- reference_id = mobj.group('reference_id')
- if reference_id:
- video_id = reference_id
- partner_id = self._PARTNER_ID
- ui_id = self._UICONF_ID
- else:
- video_id = '%s-%s' % (mobj.group('course_id'), mobj.group('part'))
-
- webpage, urlh = self._download_webpage_handle(url, video_id)
-
- mobj = re.match(self._VALID_URL, urlh.geturl())
- reference_id = mobj.group('reference_id')
- if not reference_id:
- reference_id = self._search_regex(
- r'data-reference-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
- webpage, 'kaltura reference id', group='id')
- partner_id = self._search_regex(
- r'data-partner-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
- webpage, 'kaltura widget id', default=self._PARTNER_ID,
- group='id')
- ui_id = self._search_regex(
- r'data-ui-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
- webpage, 'kaltura uiconf id', default=self._UICONF_ID,
- group='id')
-
- query = {
- 'wid': '_%s' % partner_id,
- 'uiconf_id': ui_id,
- 'flashvars[referenceId]': reference_id,
- }
-
- if self.LOGGED_IN:
- kaltura_session = self._download_json(
- '%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id),
- video_id, 'Downloading kaltura session JSON',
- 'Unable to download kaltura session JSON', fatal=False)
- if kaltura_session:
- session = kaltura_session.get('session')
- if session:
- query['flashvars[ks]'] = session
-
- return self.url_result(update_url_query(
- 'https://cdnapisec.kaltura.com/html5/html5lib/v2.37.1/mwEmbedFrame.php', query),
- 'Kaltura')
-
-
-class SafariApiIE(SafariBaseIE):
- IE_NAME = 'safari:api'
- _VALID_URL = r'https?://(?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html'
-
- _TESTS = [{
- 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
- 'only_matching': True,
- }, {
- 'url': 'https://www.safaribooksonline.com/api/v1/book/9780134664057/chapter/RHCE_Introduction.html',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- part = self._download_json(
- url, '%s/%s' % (mobj.group('course_id'), mobj.group('part')),
- 'Downloading part JSON')
- return self.url_result(part['web_url'], SafariIE.ie_key())
-
-
-class SafariCourseIE(SafariBaseIE):
- IE_NAME = 'safari:course'
- IE_DESC = 'safaribooksonline.com online courses'
-
- _VALID_URL = r'''(?x)
- https?://
- (?:
- (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/
- (?:
- library/view/[^/]+|
- api/v1/book|
- videos/[^/]+
- )|
- techbus\.safaribooksonline\.com
- )
- /(?P<id>[^/]+)
- '''
-
- _TESTS = [{
- 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
- 'info_dict': {
- 'id': '9780133392838',
- 'title': 'Hadoop Fundamentals LiveLessons',
- },
- 'playlist_count': 22,
- 'skip': 'Requires safaribooksonline account credentials',
- }, {
- 'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json',
- 'only_matching': True,
- }, {
- 'url': 'http://techbus.safaribooksonline.com/9780134426365',
- 'only_matching': True,
- }, {
- 'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314',
- 'only_matching': True,
- }, {
- 'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838',
- 'only_matching': True,
- }, {
- 'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
- 'only_matching': True,
- }]
-
- @classmethod
- def suitable(cls, url):
- return (False if SafariIE.suitable(url) or SafariApiIE.suitable(url)
- else super(SafariCourseIE, cls).suitable(url))
-
- def _real_extract(self, url):
- course_id = self._match_id(url)
-
- course_json = self._download_json(
- '%s/book/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT),
- course_id, 'Downloading course JSON')
-
- if 'chapters' not in course_json:
- raise ExtractorError(
- 'No chapters found for course %s' % course_id, expected=True)
-
- entries = [
- self.url_result(chapter, SafariApiIE.ie_key())
- for chapter in course_json['chapters']]
-
- course_title = course_json['title']
-
- return self.playlist_result(entries, course_id, course_title)
diff --git a/youtube_dl/extractor/scrippsnetworks.py b/youtube_dl/extractor/scrippsnetworks.py
deleted file mode 100644
index 8b3275735..000000000
--- a/youtube_dl/extractor/scrippsnetworks.py
+++ /dev/null
@@ -1,104 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import json
-import hashlib
-import re
-
-from .aws import AWSIE
-from .anvato import AnvatoIE
-from ..utils import (
- smuggle_url,
- urlencode_postdata,
- xpath_text,
-)
-
-
-class ScrippsNetworksWatchIE(AWSIE):
- IE_NAME = 'scrippsnetworks:watch'
- _VALID_URL = r'''(?x)
- https?://
- watch\.
- (?P<site>geniuskitchen)\.com/
- (?:
- player\.[A-Z0-9]+\.html\#|
- show/(?:[^/]+/){2}|
- player/
- )
- (?P<id>\d+)
- '''
- _TESTS = [{
- 'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/',
- 'info_dict': {
- 'id': '4194875',
- 'ext': 'mp4',
- 'title': 'Ample Hills Ice Cream Bike',
- 'description': 'Courtney Rada churns up a signature GK Now ice cream with The Scoopmaster.',
- 'uploader': 'ANV',
- 'upload_date': '20171011',
- 'timestamp': 1507698000,
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': [AnvatoIE.ie_key()],
- }]
-
- _SNI_TABLE = {
- 'geniuskitchen': 'genius',
- }
-
- _AWS_API_KEY = 'E7wSQmq0qK6xPrF13WmzKiHo4BQ7tip4pQcSXVl1'
- _AWS_PROXY_HOST = 'web.api.video.snidigital.com'
-
- _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback'
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- site_id, video_id = mobj.group('site', 'id')
-
- aws_identity_id_json = json.dumps({
- 'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % self._AWS_REGION
- }).encode('utf-8')
- token = self._download_json(
- 'https://cognito-identity.%s.amazonaws.com/' % self._AWS_REGION, video_id,
- data=aws_identity_id_json,
- headers={
- 'Accept': '*/*',
- 'Content-Type': 'application/x-amz-json-1.1',
- 'Referer': url,
- 'X-Amz-Content-Sha256': hashlib.sha256(aws_identity_id_json).hexdigest(),
- 'X-Amz-Target': 'AWSCognitoIdentityService.GetOpenIdToken',
- 'X-Amz-User-Agent': self._AWS_USER_AGENT,
- })['Token']
-
- sts = self._download_xml(
- 'https://sts.amazonaws.com/', video_id, data=urlencode_postdata({
- 'Action': 'AssumeRoleWithWebIdentity',
- 'RoleArn': 'arn:aws:iam::710330595350:role/Cognito_WebAPIUnauth_Role',
- 'RoleSessionName': 'web-identity',
- 'Version': '2011-06-15',
- 'WebIdentityToken': token,
- }), headers={
- 'Referer': url,
- 'X-Amz-User-Agent': self._AWS_USER_AGENT,
- 'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
- })
-
- def get(key):
- return xpath_text(
- sts, './/{https://sts.amazonaws.com/doc/2011-06-15/}%s' % key,
- fatal=True)
-
- mcp_id = self._aws_execute_api({
- 'uri': '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id),
- 'access_key': get('AccessKeyId'),
- 'secret_key': get('SecretAccessKey'),
- 'session_token': get('SessionToken'),
- }, video_id)['results'][0]['mcpId']
-
- return self.url_result(
- smuggle_url(
- 'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id,
- {'geo_countries': ['US']}),
- AnvatoIE.ie_key(), video_id=mcp_id)
diff --git a/youtube_dl/extractor/seeker.py b/youtube_dl/extractor/seeker.py
deleted file mode 100644
index 3b9c65e7e..000000000
--- a/youtube_dl/extractor/seeker.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-
-
-class SeekerIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?seeker\.com/(?P<display_id>.*)-(?P<article_id>\d+)\.html'
- _TESTS = [{
- # player.loadRevision3Item
- 'url': 'http://www.seeker.com/should-trump-be-required-to-release-his-tax-returns-1833805621.html',
- 'md5': '30c1dc4030cc715cf05b423d0947ac18',
- 'info_dict': {
- 'id': '76243',
- 'ext': 'webm',
- 'title': 'Should Trump Be Required To Release His Tax Returns?',
- 'description': 'Donald Trump has been secretive about his "big," "beautiful" tax returns. So what can we learn if he decides to release them?',
- 'uploader': 'Seeker Daily',
- 'uploader_id': 'seekerdaily',
- }
- }, {
- 'url': 'http://www.seeker.com/changes-expected-at-zoos-following-recent-gorilla-lion-shootings-1834116536.html',
- 'playlist': [
- {
- 'md5': '83bcd157cab89ad7318dd7b8c9cf1306',
- 'info_dict': {
- 'id': '67558',
- 'ext': 'mp4',
- 'title': 'The Pros & Cons Of Zoos',
- 'description': 'Zoos are often depicted as a terrible place for animals to live, but is there any truth to this?',
- 'uploader': 'DNews',
- 'uploader_id': 'dnews',
- },
- }
- ],
- 'info_dict': {
- 'id': '1834116536',
- 'title': 'After Gorilla Killing, Changes Ahead for Zoos',
- 'description': 'The largest association of zoos and others are hoping to learn from recent incidents that led to the shooting deaths of a gorilla and two lions.',
- },
- }]
-
- def _real_extract(self, url):
- display_id, article_id = re.match(self._VALID_URL, url).groups()
- webpage = self._download_webpage(url, display_id)
- mobj = re.search(r"player\.loadRevision3Item\('([^']+)'\s*,\s*(\d+)\);", webpage)
- if mobj:
- playlist_type, playlist_id = mobj.groups()
- return self.url_result(
- 'revision3:%s:%s' % (playlist_type, playlist_id), 'Revision3Embed', playlist_id)
- else:
- entries = [self.url_result('revision3:video_id:%s' % video_id, 'Revision3Embed', video_id) for video_id in re.findall(
- r'<iframe[^>]+src=[\'"](?:https?:)?//api\.seekernetwork\.com/player/embed\?videoId=(\d+)', webpage)]
- return self.playlist_result(
- entries, article_id, self._og_search_title(webpage), self._og_search_description(webpage))
diff --git a/youtube_dl/extractor/servingsys.py b/youtube_dl/extractor/servingsys.py
deleted file mode 100644
index c013d678f..000000000
--- a/youtube_dl/extractor/servingsys.py
+++ /dev/null
@@ -1,72 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
-)
-
-
-class ServingSysIE(InfoExtractor):
- _VALID_URL = r'https?://(?:[^.]+\.)?serving-sys\.com/BurstingPipe/adServer\.bs\?.*?&pli=(?P<id>[0-9]+)'
-
- _TEST = {
- 'url': 'http://bs.serving-sys.com/BurstingPipe/adServer.bs?cn=is&c=23&pl=VAST&pli=5349193&PluID=0&pos=7135&ord=[timestamp]&cim=1?',
- 'info_dict': {
- 'id': '5349193',
- 'title': 'AdAPPter_Hyundai_demo',
- },
- 'playlist': [{
- 'md5': 'baed851342df6846eb8677a60a011a0f',
- 'info_dict': {
- 'id': '29955898',
- 'ext': 'flv',
- 'title': 'AdAPPter_Hyundai_demo (1)',
- 'duration': 74,
- 'tbr': 1378,
- 'width': 640,
- 'height': 400,
- },
- }, {
- 'md5': '979b4da2655c4bc2d81aeb915a8c5014',
- 'info_dict': {
- 'id': '29907998',
- 'ext': 'flv',
- 'title': 'AdAPPter_Hyundai_demo (2)',
- 'duration': 34,
- 'width': 854,
- 'height': 480,
- 'tbr': 516,
- },
- }],
- 'params': {
- 'playlistend': 2,
- },
- '_skip': 'Blocked in the US [sic]',
- }
-
- def _real_extract(self, url):
- pl_id = self._match_id(url)
- vast_doc = self._download_xml(url, pl_id)
-
- title = vast_doc.find('.//AdTitle').text
- media = vast_doc.find('.//MediaFile').text
- info_url = self._search_regex(r'&adData=([^&]+)&', media, 'info URL')
-
- doc = self._download_xml(info_url, pl_id, 'Downloading video info')
- entries = [{
- '_type': 'video',
- 'id': a.attrib['id'],
- 'title': '%s (%s)' % (title, a.attrib['assetID']),
- 'url': a.attrib['URL'],
- 'duration': int_or_none(a.attrib.get('length')),
- 'tbr': int_or_none(a.attrib.get('bitrate')),
- 'height': int_or_none(a.attrib.get('height')),
- 'width': int_or_none(a.attrib.get('width')),
- } for a in doc.findall('.//AdditionalAssets/asset')]
-
- return {
- '_type': 'playlist',
- 'id': pl_id,
- 'title': title,
- 'entries': entries,
- }
diff --git a/youtube_dl/extractor/servus.py b/youtube_dl/extractor/servus.py
deleted file mode 100644
index e579d42cf..000000000
--- a/youtube_dl/extractor/servus.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-
-
-class ServusIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)/(?P<id>[aA]{2}-\w+|\d+-\d+)'
- _TESTS = [{
- 'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/',
- 'md5': '3e1dd16775aa8d5cbef23628cfffc1f4',
- 'info_dict': {
- 'id': 'AA-1T6VBU5PW1W12',
- 'ext': 'mp4',
- 'title': 'Die Grünen aus Sicht des Volkes',
- 'description': 'md5:1247204d85783afe3682644398ff2ec4',
- 'thumbnail': r're:^https?://.*\.jpg',
- }
- }, {
- 'url': 'https://www.servus.com/at/p/Wie-das-Leben-beginnt/1309984137314-381415152/',
- 'only_matching': True,
- }, {
- 'url': 'https://www.servus.com/tv/videos/aa-1t6vbu5pw1w12/',
- 'only_matching': True,
- }, {
- 'url': 'https://www.servus.com/tv/videos/1380889096408-1235196658/',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url).upper()
- webpage = self._download_webpage(url, video_id)
-
- title = self._search_regex(
- (r'videoLabel\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
- r'<h\d+[^>]+\bclass=["\']heading--(?:one|two)["\'][^>]*>(?P<title>[^<]+)'),
- webpage, 'title', default=None,
- group='title') or self._og_search_title(webpage)
- title = re.sub(r'\s*-\s*Servus TV\s*$', '', title)
- description = self._og_search_description(webpage)
- thumbnail = self._og_search_thumbnail(webpage)
-
- formats = self._extract_m3u8_formats(
- 'https://stv.rbmbtnx.net/api/v1/manifests/%s.m3u8' % video_id,
- video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py
deleted file mode 100644
index ff575f592..000000000
--- a/youtube_dl/extractor/shared.py
+++ /dev/null
@@ -1,127 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..compat import compat_b64decode
-from ..utils import (
- determine_ext,
- ExtractorError,
- int_or_none,
- KNOWN_EXTENSIONS,
- parse_filesize,
- url_or_none,
- urlencode_postdata,
-)
-
-
-class SharedBaseIE(InfoExtractor):
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage, urlh = self._download_webpage_handle(url, video_id)
-
- if self._FILE_NOT_FOUND in webpage:
- raise ExtractorError(
- 'Video %s does not exist' % video_id, expected=True)
-
- video_url = self._extract_video_url(webpage, video_id, url)
-
- title = self._extract_title(webpage)
- filesize = int_or_none(self._extract_filesize(webpage))
-
- return {
- 'id': video_id,
- 'url': video_url,
- 'ext': 'mp4',
- 'filesize': filesize,
- 'title': title,
- }
-
- def _extract_title(self, webpage):
- return compat_b64decode(self._html_search_meta(
- 'full:title', webpage, 'title')).decode('utf-8')
-
- def _extract_filesize(self, webpage):
- return self._html_search_meta(
- 'full:size', webpage, 'file size', fatal=False)
-
-
-class SharedIE(SharedBaseIE):
- IE_DESC = 'shared.sx'
- _VALID_URL = r'https?://shared\.sx/(?P<id>[\da-z]{10})'
- _FILE_NOT_FOUND = '>File does not exist<'
-
- _TEST = {
- 'url': 'http://shared.sx/0060718775',
- 'md5': '106fefed92a8a2adb8c98e6a0652f49b',
- 'info_dict': {
- 'id': '0060718775',
- 'ext': 'mp4',
- 'title': 'Bmp4',
- 'filesize': 1720110,
- },
- }
-
- def _extract_video_url(self, webpage, video_id, url):
- download_form = self._hidden_inputs(webpage)
-
- video_page = self._download_webpage(
- url, video_id, 'Downloading video page',
- data=urlencode_postdata(download_form),
- headers={
- 'Content-Type': 'application/x-www-form-urlencoded',
- 'Referer': url,
- })
-
- video_url = self._html_search_regex(
- r'data-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
- video_page, 'video URL', group='url')
-
- return video_url
-
-
-class VivoIE(SharedBaseIE):
- IE_DESC = 'vivo.sx'
- _VALID_URL = r'https?://vivo\.sx/(?P<id>[\da-z]{10})'
- _FILE_NOT_FOUND = '>The file you have requested does not exists or has been removed'
-
- _TEST = {
- 'url': 'http://vivo.sx/d7ddda0e78',
- 'md5': '15b3af41be0b4fe01f4df075c2678b2c',
- 'info_dict': {
- 'id': 'd7ddda0e78',
- 'ext': 'mp4',
- 'title': 'Chicken',
- 'filesize': 515659,
- },
- }
-
- def _extract_title(self, webpage):
- title = self._html_search_regex(
- r'data-name\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1', webpage,
- 'title', default=None, group='title')
- if title:
- ext = determine_ext(title)
- if ext.lower() in KNOWN_EXTENSIONS:
- title = title.rpartition('.' + ext)[0]
- return title
- return self._og_search_title(webpage)
-
- def _extract_filesize(self, webpage):
- return parse_filesize(self._search_regex(
- r'data-type=["\']video["\'][^>]*>Watch.*?<strong>\s*\((.+?)\)',
- webpage, 'filesize', fatal=False))
-
- def _extract_video_url(self, webpage, video_id, url):
- def decode_url(encoded_url):
- return compat_b64decode(encoded_url).decode('utf-8')
-
- stream_url = url_or_none(decode_url(self._search_regex(
- r'data-stream\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
- 'stream url', default=None, group='url')))
- if stream_url:
- return stream_url
- return self._parse_json(
- self._search_regex(
- r'InitializeStream\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
- webpage, 'stream', group='url'),
- video_id, transform_source=decode_url)[0]
diff --git a/youtube_dl/extractor/slideslive.py b/youtube_dl/extractor/slideslive.py
deleted file mode 100644
index ed84322c5..000000000
--- a/youtube_dl/extractor/slideslive.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import ExtractorError
-
-
-class SlidesLiveIE(InfoExtractor):
- _VALID_URL = r'https?://slideslive\.com/(?P<id>[0-9]+)'
- _TESTS = [{
- # video_service_name = YOUTUBE
- 'url': 'https://slideslive.com/38902413/gcc-ia16-backend',
- 'md5': 'b29fcd6c6952d0c79c5079b0e7a07e6f',
- 'info_dict': {
- 'id': 'LMtgR8ba0b0',
- 'ext': 'mp4',
- 'title': '38902413: external video',
- 'description': '3890241320170925-9-1yd6ech.mp4',
- 'uploader': 'SlidesLive Administrator',
- 'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
- 'upload_date': '20170925',
- }
- }, {
- # video_service_name = youtube
- 'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- video_data = self._download_json(
- url, video_id, headers={'Accept': 'application/json'})
- service_name = video_data['video_service_name'].lower()
- if service_name == 'youtube':
- yt_video_id = video_data['video_service_id']
- return self.url_result(yt_video_id, 'Youtube', video_id=yt_video_id)
- else:
- raise ExtractorError(
- 'Unsupported service name: {0}'.format(service_name), expected=True)
diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py
deleted file mode 100644
index a62ed84f1..000000000
--- a/youtube_dl/extractor/sohu.py
+++ /dev/null
@@ -1,202 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import (
- compat_str,
- compat_urllib_parse_urlencode,
-)
-from ..utils import (
- ExtractorError,
- int_or_none,
- try_get,
-)
-
-
-class SohuIE(InfoExtractor):
- _VALID_URL = r'https?://(?P<mytv>my\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P<id>\d+)\.shtml.*?'
-
- # Sohu videos give different MD5 sums on Travis CI and my machine
- _TESTS = [{
- 'note': 'This video is available only in Mainland China',
- 'url': 'http://tv.sohu.com/20130724/n382479172.shtml#super',
- 'info_dict': {
- 'id': '382479172',
- 'ext': 'mp4',
- 'title': 'MV:Far East Movement《The Illest》',
- },
- 'skip': 'On available in China',
- }, {
- 'url': 'http://tv.sohu.com/20150305/n409385080.shtml',
- 'info_dict': {
- 'id': '409385080',
- 'ext': 'mp4',
- 'title': '《2015湖南卫视羊年元宵晚会》唐嫣《花好月圆》',
- }
- }, {
- 'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml',
- 'info_dict': {
- 'id': '78693464',
- 'ext': 'mp4',
- 'title': '【爱范品】第31期:MWC见不到的奇葩手机',
- }
- }, {
- 'note': 'Multipart video',
- 'url': 'http://my.tv.sohu.com/pl/8384802/78910339.shtml',
- 'info_dict': {
- 'id': '78910339',
- 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
- },
- 'playlist': [{
- 'info_dict': {
- 'id': '78910339_part1',
- 'ext': 'mp4',
- 'duration': 294,
- 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
- }
- }, {
- 'info_dict': {
- 'id': '78910339_part2',
- 'ext': 'mp4',
- 'duration': 300,
- 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
- }
- }, {
- 'info_dict': {
- 'id': '78910339_part3',
- 'ext': 'mp4',
- 'duration': 150,
- 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
- }
- }]
- }, {
- 'note': 'Video with title containing dash',
- 'url': 'http://my.tv.sohu.com/us/249884221/78932792.shtml',
- 'info_dict': {
- 'id': '78932792',
- 'ext': 'mp4',
- 'title': 'youtube-dl testing video',
- },
- 'params': {
- 'skip_download': True
- }
- }]
-
- def _real_extract(self, url):
-
- def _fetch_data(vid_id, mytv=False):
- if mytv:
- base_data_url = 'http://my.tv.sohu.com/play/videonew.do?vid='
- else:
- base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
-
- return self._download_json(
- base_data_url + vid_id, video_id,
- 'Downloading JSON data for %s' % vid_id,
- headers=self.geo_verification_headers())
-
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- mytv = mobj.group('mytv') is not None
-
- webpage = self._download_webpage(url, video_id)
-
- title = re.sub(r' - 搜狐视频$', '', self._og_search_title(webpage))
-
- vid = self._html_search_regex(
- r'var vid ?= ?["\'](\d+)["\']',
- webpage, 'video path')
- vid_data = _fetch_data(vid, mytv)
- if vid_data['play'] != 1:
- if vid_data.get('status') == 12:
- raise ExtractorError(
- '%s said: There\'s something wrong in the video.' % self.IE_NAME,
- expected=True)
- else:
- self.raise_geo_restricted(
- '%s said: The video is only licensed to users in Mainland China.' % self.IE_NAME)
-
- formats_json = {}
- for format_id in ('nor', 'high', 'super', 'ori', 'h2644k', 'h2654k'):
- vid_id = vid_data['data'].get('%sVid' % format_id)
- if not vid_id:
- continue
- vid_id = compat_str(vid_id)
- formats_json[format_id] = vid_data if vid == vid_id else _fetch_data(vid_id, mytv)
-
- part_count = vid_data['data']['totalBlocks']
-
- playlist = []
- for i in range(part_count):
- formats = []
- for format_id, format_data in formats_json.items():
- allot = format_data['allot']
-
- data = format_data['data']
- clips_url = data['clipsURL']
- su = data['su']
-
- video_url = 'newflv.sohu.ccgslb.net'
- cdnId = None
- retries = 0
-
- while 'newflv.sohu.ccgslb.net' in video_url:
- params = {
- 'prot': 9,
- 'file': clips_url[i],
- 'new': su[i],
- 'prod': 'flash',
- 'rb': 1,
- }
-
- if cdnId is not None:
- params['idc'] = cdnId
-
- download_note = 'Downloading %s video URL part %d of %d' % (
- format_id, i + 1, part_count)
-
- if retries > 0:
- download_note += ' (retry #%d)' % retries
- part_info = self._parse_json(self._download_webpage(
- 'http://%s/?%s' % (allot, compat_urllib_parse_urlencode(params)),
- video_id, download_note), video_id)
-
- video_url = part_info['url']
- cdnId = part_info.get('nid')
-
- retries += 1
- if retries > 5:
- raise ExtractorError('Failed to get video URL')
-
- formats.append({
- 'url': video_url,
- 'format_id': format_id,
- 'filesize': int_or_none(
- try_get(data, lambda x: x['clipsBytes'][i])),
- 'width': int_or_none(data.get('width')),
- 'height': int_or_none(data.get('height')),
- 'fps': int_or_none(data.get('fps')),
- })
- self._sort_formats(formats)
-
- playlist.append({
- 'id': '%s_part%d' % (video_id, i + 1),
- 'title': title,
- 'duration': vid_data['data']['clipsDuration'][i],
- 'formats': formats,
- })
-
- if len(playlist) == 1:
- info = playlist[0]
- info['id'] = video_id
- else:
- info = {
- '_type': 'multi_video',
- 'entries': playlist,
- 'id': video_id,
- 'title': title,
- }
-
- return info
diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py
deleted file mode 100644
index 05538f3d6..000000000
--- a/youtube_dl/extractor/soundcloud.py
+++ /dev/null
@@ -1,795 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import itertools
-import re
-
-from .common import (
- InfoExtractor,
- SearchInfoExtractor
-)
-from ..compat import (
- compat_str,
- compat_urlparse,
- compat_urllib_parse_urlencode,
-)
-from ..utils import (
- ExtractorError,
- float_or_none,
- int_or_none,
- KNOWN_EXTENSIONS,
- merge_dicts,
- mimetype2ext,
- str_or_none,
- try_get,
- unified_timestamp,
- update_url_query,
- url_or_none,
-)
-
-
-class SoundcloudIE(InfoExtractor):
- """Information extractor for soundcloud.com
- To access the media, the uid of the song and a stream token
- must be extracted from the page source and the script must make
- a request to media.soundcloud.com/crossdomain.xml. Then
- the media can be grabbed by requesting from an url composed
- of the stream token and uid
- """
-
- _VALID_URL = r'''(?x)^(?:https?://)?
- (?:(?:(?:www\.|m\.)?soundcloud\.com/
- (?!stations/track)
- (?P<uploader>[\w\d-]+)/
- (?!(?:tracks|albums|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
- (?P<title>[\w\d-]+)/?
- (?P<token>[^?]+?)?(?:[?].*)?$)
- |(?:api\.soundcloud\.com/tracks/(?P<track_id>\d+)
- (?:/?\?secret_token=(?P<secret_token>[^&]+))?)
- |(?P<player>(?:w|player|p.)\.soundcloud\.com/player/?.*?url=.*)
- )
- '''
- IE_NAME = 'soundcloud'
- _TESTS = [
- {
- 'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
- 'md5': 'ebef0a451b909710ed1d7787dddbf0d7',
- 'info_dict': {
- 'id': '62986583',
- 'ext': 'mp3',
- 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
- 'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
- 'uploader': 'E.T. ExTerrestrial Music',
- 'timestamp': 1349920598,
- 'upload_date': '20121011',
- 'duration': 143.216,
- 'license': 'all-rights-reserved',
- 'view_count': int,
- 'like_count': int,
- 'comment_count': int,
- 'repost_count': int,
- }
- },
- # not streamable song
- {
- 'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
- 'info_dict': {
- 'id': '47127627',
- 'ext': 'mp3',
- 'title': 'Goldrushed',
- 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
- 'uploader': 'The Royal Concept',
- 'timestamp': 1337635207,
- 'upload_date': '20120521',
- 'duration': 30,
- 'license': 'all-rights-reserved',
- 'view_count': int,
- 'like_count': int,
- 'comment_count': int,
- 'repost_count': int,
- },
- 'params': {
- # rtmp
- 'skip_download': True,
- },
- },
- # private link
- {
- 'url': 'https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp',
- 'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604',
- 'info_dict': {
- 'id': '123998367',
- 'ext': 'mp3',
- 'title': 'Youtube - Dl Test Video \'\' Ä↭',
- 'description': 'test chars: \"\'/\\ä↭',
- 'uploader': 'jaimeMF',
- 'timestamp': 1386604920,
- 'upload_date': '20131209',
- 'duration': 9.927,
- 'license': 'all-rights-reserved',
- 'view_count': int,
- 'like_count': int,
- 'comment_count': int,
- 'repost_count': int,
- },
- },
- # private link (alt format)
- {
- 'url': 'https://api.soundcloud.com/tracks/123998367?secret_token=s-8Pjrp',
- 'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604',
- 'info_dict': {
- 'id': '123998367',
- 'ext': 'mp3',
- 'title': 'Youtube - Dl Test Video \'\' Ä↭',
- 'description': 'test chars: \"\'/\\ä↭',
- 'uploader': 'jaimeMF',
- 'timestamp': 1386604920,
- 'upload_date': '20131209',
- 'duration': 9.927,
- 'license': 'all-rights-reserved',
- 'view_count': int,
- 'like_count': int,
- 'comment_count': int,
- 'repost_count': int,
- },
- },
- # downloadable song
- {
- 'url': 'https://soundcloud.com/oddsamples/bus-brakes',
- 'md5': '7624f2351f8a3b2e7cd51522496e7631',
- 'info_dict': {
- 'id': '128590877',
- 'ext': 'mp3',
- 'title': 'Bus Brakes',
- 'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66',
- 'uploader': 'oddsamples',
- 'timestamp': 1389232924,
- 'upload_date': '20140109',
- 'duration': 17.346,
- 'license': 'cc-by-sa',
- 'view_count': int,
- 'like_count': int,
- 'comment_count': int,
- 'repost_count': int,
- },
- },
- # private link, downloadable format
- {
- 'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd',
- 'md5': '64a60b16e617d41d0bef032b7f55441e',
- 'info_dict': {
- 'id': '340344461',
- 'ext': 'wav',
- 'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
- 'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
- 'uploader': 'Ori Uplift Music',
- 'timestamp': 1504206263,
- 'upload_date': '20170831',
- 'duration': 7449.096,
- 'license': 'all-rights-reserved',
- 'view_count': int,
- 'like_count': int,
- 'comment_count': int,
- 'repost_count': int,
- },
- },
- # no album art, use avatar pic for thumbnail
- {
- 'url': 'https://soundcloud.com/garyvee/sideways-prod-mad-real',
- 'md5': '59c7872bc44e5d99b7211891664760c2',
- 'info_dict': {
- 'id': '309699954',
- 'ext': 'mp3',
- 'title': 'Sideways (Prod. Mad Real)',
- 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
- 'uploader': 'garyvee',
- 'timestamp': 1488152409,
- 'upload_date': '20170226',
- 'duration': 207.012,
- 'thumbnail': r're:https?://.*\.jpg',
- 'license': 'all-rights-reserved',
- 'view_count': int,
- 'like_count': int,
- 'comment_count': int,
- 'repost_count': int,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- # not available via api.soundcloud.com/i1/tracks/id/streams
- {
- 'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer',
- 'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7',
- 'info_dict': {
- 'id': '583011102',
- 'ext': 'mp3',
- 'title': 'Mezzo Valzer',
- 'description': 'md5:4138d582f81866a530317bae316e8b61',
- 'uploader': 'Giovanni Sarani',
- 'timestamp': 1551394171,
- 'upload_date': '20190228',
- 'duration': 180.157,
- 'thumbnail': r're:https?://.*\.jpg',
- 'license': 'all-rights-reserved',
- 'view_count': int,
- 'like_count': int,
- 'comment_count': int,
- 'repost_count': int,
- },
- 'expected_warnings': ['Unable to download JSON metadata'],
- }
- ]
-
- _CLIENT_ID = 'BeGVhOrGmfboy1LtiHTQF6Ejpt9ULJCI'
-
- @staticmethod
- def _extract_urls(webpage):
- return [m.group('url') for m in re.finditer(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
- webpage)]
-
- @classmethod
- def _resolv_url(cls, url):
- return 'https://api.soundcloud.com/resolve.json?url=' + url + '&client_id=' + cls._CLIENT_ID
-
- def _extract_info_dict(self, info, full_title=None, quiet=False, secret_token=None):
- track_id = compat_str(info['id'])
- title = info['title']
- name = full_title or track_id
- if quiet:
- self.report_extraction(name)
- thumbnail = info.get('artwork_url') or info.get('user', {}).get('avatar_url')
- if isinstance(thumbnail, compat_str):
- thumbnail = thumbnail.replace('-large', '-t500x500')
- username = try_get(info, lambda x: x['user']['username'], compat_str)
-
- def extract_count(key):
- return int_or_none(info.get('%s_count' % key))
-
- like_count = extract_count('favoritings')
- if like_count is None:
- like_count = extract_count('likes')
-
- result = {
- 'id': track_id,
- 'uploader': username,
- 'timestamp': unified_timestamp(info.get('created_at')),
- 'title': title,
- 'description': info.get('description'),
- 'thumbnail': thumbnail,
- 'duration': float_or_none(info.get('duration'), 1000),
- 'webpage_url': info.get('permalink_url'),
- 'license': info.get('license'),
- 'view_count': extract_count('playback'),
- 'like_count': like_count,
- 'comment_count': extract_count('comment'),
- 'repost_count': extract_count('reposts'),
- 'genre': info.get('genre'),
- }
-
- format_urls = set()
- formats = []
- query = {'client_id': self._CLIENT_ID}
- if secret_token is not None:
- query['secret_token'] = secret_token
- if info.get('downloadable', False):
- # We can build a direct link to the song
- format_url = update_url_query(
- 'https://api.soundcloud.com/tracks/%s/download' % track_id, query)
- format_urls.add(format_url)
- formats.append({
- 'format_id': 'download',
- 'ext': info.get('original_format', 'mp3'),
- 'url': format_url,
- 'vcodec': 'none',
- 'preference': 10,
- })
-
- # Old API, does not work for some tracks (e.g.
- # https://soundcloud.com/giovannisarani/mezzo-valzer)
- format_dict = self._download_json(
- 'https://api.soundcloud.com/i1/tracks/%s/streams' % track_id,
- track_id, 'Downloading track url', query=query, fatal=False)
-
- if format_dict:
- for key, stream_url in format_dict.items():
- if stream_url in format_urls:
- continue
- format_urls.add(stream_url)
- ext, abr = 'mp3', None
- mobj = re.search(r'_([^_]+)_(\d+)_url', key)
- if mobj:
- ext, abr = mobj.groups()
- abr = int(abr)
- if key.startswith('http'):
- stream_formats = [{
- 'format_id': key,
- 'ext': ext,
- 'url': stream_url,
- }]
- elif key.startswith('rtmp'):
- # The url doesn't have an rtmp app, we have to extract the playpath
- url, path = stream_url.split('mp3:', 1)
- stream_formats = [{
- 'format_id': key,
- 'url': url,
- 'play_path': 'mp3:' + path,
- 'ext': 'flv',
- }]
- elif key.startswith('hls'):
- stream_formats = self._extract_m3u8_formats(
- stream_url, track_id, ext, entry_protocol='m3u8_native',
- m3u8_id=key, fatal=False)
- else:
- continue
-
- if abr:
- for f in stream_formats:
- f['abr'] = abr
-
- formats.extend(stream_formats)
-
- # New API
- transcodings = try_get(
- info, lambda x: x['media']['transcodings'], list) or []
- for t in transcodings:
- if not isinstance(t, dict):
- continue
- format_url = url_or_none(t.get('url'))
- if not format_url:
- continue
- stream = self._download_json(
- update_url_query(format_url, query), track_id, fatal=False)
- if not isinstance(stream, dict):
- continue
- stream_url = url_or_none(stream.get('url'))
- if not stream_url:
- continue
- if stream_url in format_urls:
- continue
- format_urls.add(stream_url)
- protocol = try_get(t, lambda x: x['format']['protocol'], compat_str)
- if protocol != 'hls' and '/hls' in format_url:
- protocol = 'hls'
- ext = None
- preset = str_or_none(t.get('preset'))
- if preset:
- ext = preset.split('_')[0]
- if ext not in KNOWN_EXTENSIONS:
- mimetype = try_get(
- t, lambda x: x['format']['mime_type'], compat_str)
- ext = mimetype2ext(mimetype) or 'mp3'
- format_id_list = []
- if protocol:
- format_id_list.append(protocol)
- format_id_list.append(ext)
- format_id = '_'.join(format_id_list)
- formats.append({
- 'url': stream_url,
- 'format_id': format_id,
- 'ext': ext,
- 'protocol': 'm3u8_native' if protocol == 'hls' else 'http',
- })
-
- if not formats:
- # We fallback to the stream_url in the original info, this
- # cannot be always used, sometimes it can give an HTTP 404 error
- formats.append({
- 'format_id': 'fallback',
- 'url': update_url_query(info['stream_url'], query),
- 'ext': 'mp3',
- })
- self._check_formats(formats, track_id)
-
- for f in formats:
- f['vcodec'] = 'none'
-
- self._sort_formats(formats)
- result['formats'] = formats
-
- return result
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE)
- if mobj is None:
- raise ExtractorError('Invalid URL: %s' % url)
-
- track_id = mobj.group('track_id')
- new_info = {}
-
- if track_id is not None:
- info_json_url = 'https://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID
- full_title = track_id
- token = mobj.group('secret_token')
- if token:
- info_json_url += '&secret_token=' + token
- elif mobj.group('player'):
- query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
- real_url = query['url'][0]
- # If the token is in the query of the original url we have to
- # manually add it
- if 'secret_token' in query:
- real_url += '?secret_token=' + query['secret_token'][0]
- return self.url_result(real_url)
- else:
- # extract uploader (which is in the url)
- uploader = mobj.group('uploader')
- # extract simple title (uploader + slug of song title)
- slug_title = mobj.group('title')
- token = mobj.group('token')
- full_title = resolve_title = '%s/%s' % (uploader, slug_title)
- if token:
- resolve_title += '/%s' % token
-
- webpage = self._download_webpage(url, full_title, fatal=False)
- if webpage:
- entries = self._parse_json(
- self._search_regex(
- r'var\s+c\s*=\s*(\[.+?\])\s*,\s*o\s*=Date\b', webpage,
- 'data', default='[]'), full_title, fatal=False)
- if entries:
- for e in entries:
- if not isinstance(e, dict):
- continue
- if e.get('id') != 67:
- continue
- data = try_get(e, lambda x: x['data'][0], dict)
- if data:
- new_info = data
- break
- info_json_url = self._resolv_url(
- 'https://soundcloud.com/%s' % resolve_title)
-
- # Contains some additional info missing from new_info
- info = self._download_json(
- info_json_url, full_title, 'Downloading info JSON')
-
- return self._extract_info_dict(
- merge_dicts(info, new_info), full_title, secret_token=token)
-
-
-class SoundcloudPlaylistBaseIE(SoundcloudIE):
- @staticmethod
- def _extract_id(e):
- return compat_str(e['id']) if e.get('id') else None
-
- def _extract_track_entries(self, tracks):
- return [
- self.url_result(
- track['permalink_url'], SoundcloudIE.ie_key(),
- video_id=self._extract_id(track))
- for track in tracks if track.get('permalink_url')]
-
-
-class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
- _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[\w\d-]+)(?:/(?P<token>[^?/]+))?'
- IE_NAME = 'soundcloud:set'
- _TESTS = [{
- 'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
- 'info_dict': {
- 'id': '2284613',
- 'title': 'The Royal Concept EP',
- },
- 'playlist_mincount': 5,
- }, {
- 'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
-
- # extract uploader (which is in the url)
- uploader = mobj.group('uploader')
- # extract simple title (uploader + slug of song title)
- slug_title = mobj.group('slug_title')
- full_title = '%s/sets/%s' % (uploader, slug_title)
- url = 'https://soundcloud.com/%s/sets/%s' % (uploader, slug_title)
-
- token = mobj.group('token')
- if token:
- full_title += '/' + token
- url += '/' + token
-
- resolv_url = self._resolv_url(url)
- info = self._download_json(resolv_url, full_title)
-
- if 'errors' in info:
- msgs = (compat_str(err['error_message']) for err in info['errors'])
- raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs))
-
- entries = self._extract_track_entries(info['tracks'])
-
- return {
- '_type': 'playlist',
- 'entries': entries,
- 'id': '%s' % info['id'],
- 'title': info['title'],
- }
-
-
-class SoundcloudPagedPlaylistBaseIE(SoundcloudPlaylistBaseIE):
- _API_V2_BASE = 'https://api-v2.soundcloud.com'
-
- def _extract_playlist(self, base_url, playlist_id, playlist_title):
- COMMON_QUERY = {
- 'limit': 50,
- 'client_id': self._CLIENT_ID,
- 'linked_partitioning': '1',
- }
-
- query = COMMON_QUERY.copy()
- query['offset'] = 0
-
- next_href = base_url + '?' + compat_urllib_parse_urlencode(query)
-
- entries = []
- for i in itertools.count():
- response = self._download_json(
- next_href, playlist_id, 'Downloading track page %s' % (i + 1))
-
- collection = response['collection']
-
- if not isinstance(collection, list):
- collection = []
-
- # Empty collection may be returned, in this case we proceed
- # straight to next_href
-
- def resolve_entry(candidates):
- for cand in candidates:
- if not isinstance(cand, dict):
- continue
- permalink_url = url_or_none(cand.get('permalink_url'))
- if not permalink_url:
- continue
- return self.url_result(
- permalink_url,
- ie=SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None,
- video_id=self._extract_id(cand),
- video_title=cand.get('title'))
-
- for e in collection:
- entry = resolve_entry((e, e.get('track'), e.get('playlist')))
- if entry:
- entries.append(entry)
-
- next_href = response.get('next_href')
- if not next_href:
- break
-
- parsed_next_href = compat_urlparse.urlparse(response['next_href'])
- qs = compat_urlparse.parse_qs(parsed_next_href.query)
- qs.update(COMMON_QUERY)
- next_href = compat_urlparse.urlunparse(
- parsed_next_href._replace(query=compat_urllib_parse_urlencode(qs, True)))
-
- return {
- '_type': 'playlist',
- 'id': playlist_id,
- 'title': playlist_title,
- 'entries': entries,
- }
-
-
-class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
- _VALID_URL = r'''(?x)
- https?://
- (?:(?:www|m)\.)?soundcloud\.com/
- (?P<user>[^/]+)
- (?:/
- (?P<rsrc>tracks|albums|sets|reposts|likes|spotlight)
- )?
- /?(?:[?#].*)?$
- '''
- IE_NAME = 'soundcloud:user'
- _TESTS = [{
- 'url': 'https://soundcloud.com/soft-cell-official',
- 'info_dict': {
- 'id': '207965082',
- 'title': 'Soft Cell (All)',
- },
- 'playlist_mincount': 28,
- }, {
- 'url': 'https://soundcloud.com/soft-cell-official/tracks',
- 'info_dict': {
- 'id': '207965082',
- 'title': 'Soft Cell (Tracks)',
- },
- 'playlist_mincount': 27,
- }, {
- 'url': 'https://soundcloud.com/soft-cell-official/albums',
- 'info_dict': {
- 'id': '207965082',
- 'title': 'Soft Cell (Albums)',
- },
- 'playlist_mincount': 1,
- }, {
- 'url': 'https://soundcloud.com/jcv246/sets',
- 'info_dict': {
- 'id': '12982173',
- 'title': 'Jordi / cv (Playlists)',
- },
- 'playlist_mincount': 2,
- }, {
- 'url': 'https://soundcloud.com/jcv246/reposts',
- 'info_dict': {
- 'id': '12982173',
- 'title': 'Jordi / cv (Reposts)',
- },
- 'playlist_mincount': 6,
- }, {
- 'url': 'https://soundcloud.com/clalberg/likes',
- 'info_dict': {
- 'id': '11817582',
- 'title': 'clalberg (Likes)',
- },
- 'playlist_mincount': 5,
- }, {
- 'url': 'https://soundcloud.com/grynpyret/spotlight',
- 'info_dict': {
- 'id': '7098329',
- 'title': 'Grynpyret (Spotlight)',
- },
- 'playlist_mincount': 1,
- }]
-
- _BASE_URL_MAP = {
- 'all': '%s/stream/users/%%s' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
- 'tracks': '%s/users/%%s/tracks' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
- 'albums': '%s/users/%%s/albums' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
- 'sets': '%s/users/%%s/playlists' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
- 'reposts': '%s/stream/users/%%s/reposts' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
- 'likes': '%s/users/%%s/likes' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
- 'spotlight': '%s/users/%%s/spotlight' % SoundcloudPagedPlaylistBaseIE._API_V2_BASE,
- }
-
- _TITLE_MAP = {
- 'all': 'All',
- 'tracks': 'Tracks',
- 'albums': 'Albums',
- 'sets': 'Playlists',
- 'reposts': 'Reposts',
- 'likes': 'Likes',
- 'spotlight': 'Spotlight',
- }
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- uploader = mobj.group('user')
-
- url = 'https://soundcloud.com/%s/' % uploader
- resolv_url = self._resolv_url(url)
- user = self._download_json(
- resolv_url, uploader, 'Downloading user info')
-
- resource = mobj.group('rsrc') or 'all'
-
- return self._extract_playlist(
- self._BASE_URL_MAP[resource] % user['id'], compat_str(user['id']),
- '%s (%s)' % (user['username'], self._TITLE_MAP[resource]))
-
-
-class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
- _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/stations/track/[^/]+/(?P<id>[^/?#&]+)'
- IE_NAME = 'soundcloud:trackstation'
- _TESTS = [{
- 'url': 'https://soundcloud.com/stations/track/officialsundial/your-text',
- 'info_dict': {
- 'id': '286017854',
- 'title': 'Track station: your-text',
- },
- 'playlist_mincount': 47,
- }]
-
- def _real_extract(self, url):
- track_name = self._match_id(url)
-
- webpage = self._download_webpage(url, track_name)
-
- track_id = self._search_regex(
- r'soundcloud:track-stations:(\d+)', webpage, 'track id')
-
- return self._extract_playlist(
- '%s/stations/soundcloud:track-stations:%s/tracks'
- % (self._API_V2_BASE, track_id),
- track_id, 'Track station: %s' % track_name)
-
-
-class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
- _VALID_URL = r'https?://api\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
- IE_NAME = 'soundcloud:playlist'
- _TESTS = [{
- 'url': 'https://api.soundcloud.com/playlists/4110309',
- 'info_dict': {
- 'id': '4110309',
- 'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
- 'description': 're:.*?TILT Brass - Bowery Poetry Club',
- },
- 'playlist_count': 6,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- playlist_id = mobj.group('id')
- base_url = '%s//api.soundcloud.com/playlists/%s.json?' % (self.http_scheme(), playlist_id)
-
- data_dict = {
- 'client_id': self._CLIENT_ID,
- }
- token = mobj.group('token')
-
- if token:
- data_dict['secret_token'] = token
-
- data = compat_urllib_parse_urlencode(data_dict)
- data = self._download_json(
- base_url + data, playlist_id, 'Downloading playlist')
-
- entries = self._extract_track_entries(data['tracks'])
-
- return {
- '_type': 'playlist',
- 'id': playlist_id,
- 'title': data.get('title'),
- 'description': data.get('description'),
- 'entries': entries,
- }
-
-
-class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
- IE_NAME = 'soundcloud:search'
- IE_DESC = 'Soundcloud search'
- _MAX_RESULTS = float('inf')
- _TESTS = [{
- 'url': 'scsearch15:post-avant jazzcore',
- 'info_dict': {
- 'title': 'post-avant jazzcore',
- },
- 'playlist_count': 15,
- }]
-
- _SEARCH_KEY = 'scsearch'
- _MAX_RESULTS_PER_PAGE = 200
- _DEFAULT_RESULTS_PER_PAGE = 50
- _API_V2_BASE = 'https://api-v2.soundcloud.com'
-
- def _get_collection(self, endpoint, collection_id, **query):
- limit = min(
- query.get('limit', self._DEFAULT_RESULTS_PER_PAGE),
- self._MAX_RESULTS_PER_PAGE)
- query['limit'] = limit
- query['client_id'] = self._CLIENT_ID
- query['linked_partitioning'] = '1'
- query['offset'] = 0
- data = compat_urllib_parse_urlencode(query)
- next_url = '{0}{1}?{2}'.format(self._API_V2_BASE, endpoint, data)
-
- collected_results = 0
-
- for i in itertools.count(1):
- response = self._download_json(
- next_url, collection_id, 'Downloading page {0}'.format(i),
- 'Unable to download API page')
-
- collection = response.get('collection', [])
- if not collection:
- break
-
- collection = list(filter(bool, collection))
- collected_results += len(collection)
-
- for item in collection:
- yield self.url_result(item['uri'], SoundcloudIE.ie_key())
-
- if not collection or collected_results >= limit:
- break
-
- next_url = response.get('next_href')
- if not next_url:
- break
-
- def _get_n_results(self, query, n):
- tracks = self._get_collection('/search/tracks', query, limit=n, q=query)
- return self.playlist_result(tracks, playlist_title=query)
diff --git a/youtube_dl/extractor/spankbang.py b/youtube_dl/extractor/spankbang.py
deleted file mode 100644
index e040ada29..000000000
--- a/youtube_dl/extractor/spankbang.py
+++ /dev/null
@@ -1,180 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- merge_dicts,
- orderedSet,
- parse_duration,
- parse_resolution,
- str_to_int,
- url_or_none,
- urlencode_postdata,
-)
-
-
-class SpankBangIE(InfoExtractor):
- _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/(?:video|play|embed)\b'
- _TESTS = [{
- 'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
- 'md5': '1cc433e1d6aa14bc376535b8679302f7',
- 'info_dict': {
- 'id': '3vvn',
- 'ext': 'mp4',
- 'title': 'fantasy solo',
- 'description': 'dillion harper masturbates on a bed',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'silly2587',
- 'timestamp': 1422571989,
- 'upload_date': '20150129',
- 'age_limit': 18,
- }
- }, {
- # 480p only
- 'url': 'http://spankbang.com/1vt0/video/solvane+gangbang',
- 'only_matching': True,
- }, {
- # no uploader
- 'url': 'http://spankbang.com/lklg/video/sex+with+anyone+wedding+edition+2',
- 'only_matching': True,
- }, {
- # mobile page
- 'url': 'http://m.spankbang.com/1o2de/video/can+t+remember+her+name',
- 'only_matching': True,
- }, {
- # 4k
- 'url': 'https://spankbang.com/1vwqx/video/jade+kush+solo+4k',
- 'only_matching': True,
- }, {
- 'url': 'https://m.spankbang.com/3vvn/play/fantasy+solo/480p/',
- 'only_matching': True,
- }, {
- 'url': 'https://m.spankbang.com/3vvn/play',
- 'only_matching': True,
- }, {
- 'url': 'https://spankbang.com/2y3td/embed/',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(
- url.replace('/%s/embed' % video_id, '/%s/video' % video_id),
- video_id, headers={'Cookie': 'country=US'})
-
- if re.search(r'<[^>]+\bid=["\']video_removed', webpage):
- raise ExtractorError(
- 'Video %s is not available' % video_id, expected=True)
-
- formats = []
-
- def extract_format(format_id, format_url):
- f_url = url_or_none(format_url)
- if not f_url:
- return
- f = parse_resolution(format_id)
- f.update({
- 'url': f_url,
- 'format_id': format_id,
- })
- formats.append(f)
-
- STREAM_URL_PREFIX = 'stream_url_'
-
- for mobj in re.finditer(
- r'%s(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2'
- % STREAM_URL_PREFIX, webpage):
- extract_format(mobj.group('id', 'url'))
-
- if not formats:
- stream_key = self._search_regex(
- r'data-streamkey\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
- webpage, 'stream key', group='value')
-
- sb_csrf_session = self._get_cookies(
- 'https://spankbang.com')['sb_csrf_session'].value
-
- stream = self._download_json(
- 'https://spankbang.com/api/videos/stream', video_id,
- 'Downloading stream JSON', data=urlencode_postdata({
- 'id': stream_key,
- 'data': 0,
- 'sb_csrf_session': sb_csrf_session,
- }), headers={
- 'Referer': url,
- 'X-CSRFToken': sb_csrf_session,
- })
-
- for format_id, format_url in stream.items():
- if format_id.startswith(STREAM_URL_PREFIX):
- if format_url and isinstance(format_url, list):
- format_url = format_url[0]
- extract_format(
- format_id[len(STREAM_URL_PREFIX):], format_url)
-
- self._sort_formats(formats)
-
- info = self._search_json_ld(webpage, video_id, default={})
-
- title = self._html_search_regex(
- r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title', default=None)
- description = self._search_regex(
- r'<div[^>]+\bclass=["\']bottom[^>]+>\s*<p>[^<]*</p>\s*<p>([^<]+)',
- webpage, 'description', default=None)
- thumbnail = self._og_search_thumbnail(webpage, default=None)
- uploader = self._html_search_regex(
- (r'(?s)<li[^>]+class=["\']profile[^>]+>(.+?)</a>',
- r'class="user"[^>]*><img[^>]+>([^<]+)'),
- webpage, 'uploader', default=None)
- duration = parse_duration(self._search_regex(
- r'<div[^>]+\bclass=["\']right_side[^>]+>\s*<span>([^<]+)',
- webpage, 'duration', default=None))
- view_count = str_to_int(self._search_regex(
- r'([\d,.]+)\s+plays', webpage, 'view count', default=None))
-
- age_limit = self._rta_search(webpage)
-
- return merge_dicts({
- 'id': video_id,
- 'title': title or video_id,
- 'description': description,
- 'thumbnail': thumbnail,
- 'uploader': uploader,
- 'duration': duration,
- 'view_count': view_count,
- 'formats': formats,
- 'age_limit': age_limit,
- }, info
- )
-
-
-class SpankBangPlaylistIE(InfoExtractor):
- _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/[^/]+'
- _TEST = {
- 'url': 'https://spankbang.com/ug0k/playlist/big+ass+titties',
- 'info_dict': {
- 'id': 'ug0k',
- 'title': 'Big Ass Titties',
- },
- 'playlist_mincount': 50,
- }
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
-
- webpage = self._download_webpage(
- url, playlist_id, headers={'Cookie': 'country=US; mobile=on'})
-
- entries = [self.url_result(
- 'https://spankbang.com/%s/video' % video_id,
- ie=SpankBangIE.ie_key(), video_id=video_id)
- for video_id in orderedSet(re.findall(
- r'<a[^>]+\bhref=["\']/?([\da-z]+)/play/', webpage))]
-
- title = self._html_search_regex(
- r'<h1>([^<]+)\s+playlist</h1>', webpage, 'playlist title',
- fatal=False)
-
- return self.playlist_result(entries, playlist_id, title)
diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py
deleted file mode 100644
index 44d8fa52f..000000000
--- a/youtube_dl/extractor/spankwire.py
+++ /dev/null
@@ -1,127 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_unquote,
- compat_urllib_parse_urlparse,
-)
-from ..utils import (
- sanitized_Request,
- str_to_int,
- unified_strdate,
-)
-from ..aes import aes_decrypt_text
-
-
-class SpankwireIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?(?P<url>spankwire\.com/[^/]*/video(?P<id>[0-9]+)/?)'
- _TESTS = [{
- # download URL pattern: */<height>P_<tbr>K_<video_id>.mp4
- 'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
- 'md5': '8bbfde12b101204b39e4b9fe7eb67095',
- 'info_dict': {
- 'id': '103545',
- 'ext': 'mp4',
- 'title': 'Buckcherry`s X Rated Music Video Crazy Bitch',
- 'description': 'Crazy Bitch X rated music video.',
- 'uploader': 'oreusz',
- 'uploader_id': '124697',
- 'upload_date': '20070507',
- 'age_limit': 18,
- }
- }, {
- # download URL pattern: */mp4_<format_id>_<video_id>.mp4
- 'url': 'http://www.spankwire.com/Titcums-Compiloation-I/video1921551/',
- 'md5': '09b3c20833308b736ae8902db2f8d7e6',
- 'info_dict': {
- 'id': '1921551',
- 'ext': 'mp4',
- 'title': 'Titcums Compiloation I',
- 'description': 'cum on tits',
- 'uploader': 'dannyh78999',
- 'uploader_id': '3056053',
- 'upload_date': '20150822',
- 'age_limit': 18,
- },
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
- req = sanitized_Request('http://www.' + mobj.group('url'))
- req.add_header('Cookie', 'age_verified=1')
- webpage = self._download_webpage(req, video_id)
-
- title = self._html_search_regex(
- r'<h1>([^<]+)', webpage, 'title')
- description = self._html_search_regex(
- r'(?s)<div\s+id="descriptionContent">(.+?)</div>',
- webpage, 'description', fatal=False)
- thumbnail = self._html_search_regex(
- r'playerData\.screenShot\s*=\s*["\']([^"\']+)["\']',
- webpage, 'thumbnail', fatal=False)
-
- uploader = self._html_search_regex(
- r'by:\s*<a [^>]*>(.+?)</a>',
- webpage, 'uploader', fatal=False)
- uploader_id = self._html_search_regex(
- r'by:\s*<a href="/(?:user/viewProfile|Profile\.aspx)\?.*?UserId=(\d+).*?"',
- webpage, 'uploader id', fatal=False)
- upload_date = unified_strdate(self._html_search_regex(
- r'</a> on (.+?) at \d+:\d+',
- webpage, 'upload date', fatal=False))
-
- view_count = str_to_int(self._html_search_regex(
- r'<div id="viewsCounter"><span>([\d,\.]+)</span> views</div>',
- webpage, 'view count', fatal=False))
- comment_count = str_to_int(self._html_search_regex(
- r'<span\s+id="spCommentCount"[^>]*>([\d,\.]+)</span>',
- webpage, 'comment count', fatal=False))
-
- videos = re.findall(
- r'playerData\.cdnPath([0-9]{3,})\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage)
- heights = [int(video[0]) for video in videos]
- video_urls = list(map(compat_urllib_parse_unquote, [video[1] for video in videos]))
- if webpage.find(r'flashvars\.encrypted = "true"') != -1:
- password = self._search_regex(
- r'flashvars\.video_title = "([^"]+)',
- webpage, 'password').replace('+', ' ')
- video_urls = list(map(
- lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'),
- video_urls))
-
- formats = []
- for height, video_url in zip(heights, video_urls):
- path = compat_urllib_parse_urlparse(video_url).path
- m = re.search(r'/(?P<height>\d+)[pP]_(?P<tbr>\d+)[kK]', path)
- if m:
- tbr = int(m.group('tbr'))
- height = int(m.group('height'))
- else:
- tbr = None
- formats.append({
- 'url': video_url,
- 'format_id': '%dp' % height,
- 'height': height,
- 'tbr': tbr,
- })
- self._sort_formats(formats)
-
- age_limit = self._rta_search(webpage)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'upload_date': upload_date,
- 'view_count': view_count,
- 'comment_count': comment_count,
- 'formats': formats,
- 'age_limit': age_limit,
- }
diff --git a/youtube_dl/extractor/spike.py b/youtube_dl/extractor/spike.py
deleted file mode 100644
index 7c11ea7aa..000000000
--- a/youtube_dl/extractor/spike.py
+++ /dev/null
@@ -1,57 +0,0 @@
-from __future__ import unicode_literals
-
-from .mtv import MTVServicesInfoExtractor
-
-
-class BellatorIE(MTVServicesInfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?bellator\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)'
- _TESTS = [{
- 'url': 'http://www.bellator.com/fight/atwr7k/bellator-158-michael-page-vs-evangelista-cyborg',
- 'info_dict': {
- 'id': 'b55e434e-fde1-4a98-b7cc-92003a034de4',
- 'ext': 'mp4',
- 'title': 'Douglas Lima vs. Paul Daley - Round 1',
- 'description': 'md5:805a8dd29310fd611d32baba2f767885',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- 'url': 'http://www.bellator.com/video-clips/bw6k7n/bellator-158-foundations-michael-venom-page',
- 'only_matching': True,
- }]
-
- _FEED_URL = 'http://www.bellator.com/feeds/mrss/'
- _GEO_COUNTRIES = ['US']
-
-
-class ParamountNetworkIE(MTVServicesInfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?paramountnetwork\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)'
- _TESTS = [{
- 'url': 'http://www.paramountnetwork.com/episodes/j830qm/lip-sync-battle-joel-mchale-vs-jim-rash-season-2-ep-13',
- 'info_dict': {
- 'id': '37ace3a8-1df6-48be-85b8-38df8229e241',
- 'ext': 'mp4',
- 'title': 'Lip Sync Battle|April 28, 2016|2|209|Joel McHale Vs. Jim Rash|Act 1',
- 'description': 'md5:a739ca8f978a7802f67f8016d27ce114',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }]
-
- _FEED_URL = 'http://www.paramountnetwork.com/feeds/mrss/'
- _GEO_COUNTRIES = ['US']
-
- def _extract_mgid(self, webpage):
- root_data = self._parse_json(self._search_regex(
- r'window\.__DATA__\s*=\s*({.+})',
- webpage, 'data'), None)
-
- def find_sub_data(data, data_type):
- return next(c for c in data['children'] if c.get('type') == data_type)
-
- c = find_sub_data(find_sub_data(root_data, 'MainContainer'), 'VideoPlayer')
- return c['props']['media']['video']['config']['uri']
diff --git a/youtube_dl/extractor/sportdeutschland.py b/youtube_dl/extractor/sportdeutschland.py
deleted file mode 100644
index a3c35a899..000000000
--- a/youtube_dl/extractor/sportdeutschland.py
+++ /dev/null
@@ -1,100 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- parse_iso8601,
- sanitized_Request,
-)
-
-
-class SportDeutschlandIE(InfoExtractor):
- _VALID_URL = r'https?://sportdeutschland\.tv/(?P<sport>[^/?#]+)/(?P<id>[^?#/]+)(?:$|[?#])'
- _TESTS = [{
- 'url': 'http://sportdeutschland.tv/badminton/live-li-ning-badminton-weltmeisterschaft-2014-kopenhagen',
- 'info_dict': {
- 'id': 'live-li-ning-badminton-weltmeisterschaft-2014-kopenhagen',
- 'ext': 'mp4',
- 'title': 're:Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen',
- 'categories': ['Badminton'],
- 'view_count': int,
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'description': r're:Die Badminton-WM 2014 aus Kopenhagen bei Sportdeutschland\.TV',
- 'timestamp': int,
- 'upload_date': 're:^201408[23][0-9]$',
- },
- 'params': {
- 'skip_download': 'Live stream',
- },
- }, {
- 'url': 'http://sportdeutschland.tv/li-ning-badminton-wm-2014/lee-li-ning-badminton-weltmeisterschaft-2014-kopenhagen-herren-einzel-wei-vs',
- 'info_dict': {
- 'id': 'lee-li-ning-badminton-weltmeisterschaft-2014-kopenhagen-herren-einzel-wei-vs',
- 'ext': 'mp4',
- 'upload_date': '20140825',
- 'description': 'md5:60a20536b57cee7d9a4ec005e8687504',
- 'timestamp': 1408976060,
- 'duration': 2732,
- 'title': 'Li-Ning Badminton Weltmeisterschaft 2014 Kopenhagen: Herren Einzel, Wei Lee vs. Keun Lee',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'view_count': int,
- 'categories': ['Li-Ning Badminton WM 2014'],
-
- }
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- sport_id = mobj.group('sport')
-
- api_url = 'http://proxy.vidibusdynamic.net/sportdeutschland.tv/api/permalinks/%s/%s?access_token=true' % (
- sport_id, video_id)
- req = sanitized_Request(api_url, headers={
- 'Accept': 'application/vnd.vidibus.v2.html+json',
- 'Referer': url,
- })
- data = self._download_json(req, video_id)
-
- asset = data['asset']
- categories = [data['section']['title']]
-
- formats = []
- smil_url = asset['video']
- if '.smil' in smil_url:
- m3u8_url = smil_url.replace('.smil', '.m3u8')
- formats.extend(
- self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4'))
-
- smil_doc = self._download_xml(
- smil_url, video_id, note='Downloading SMIL metadata')
- base_url_el = smil_doc.find('./head/meta')
- if base_url_el:
- base_url = base_url_el.attrib['base']
- formats.extend([{
- 'format_id': 'rmtp',
- 'url': base_url if base_url_el else n.attrib['src'],
- 'play_path': n.attrib['src'],
- 'ext': 'flv',
- 'preference': -100,
- 'format_note': 'Seems to fail at example stream',
- } for n in smil_doc.findall('./body/video')])
- else:
- formats.append({'url': smil_url})
-
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'formats': formats,
- 'title': asset['title'],
- 'thumbnail': asset.get('image'),
- 'description': asset.get('teaser'),
- 'duration': asset.get('duration'),
- 'categories': categories,
- 'view_count': asset.get('views'),
- 'rtmp_live': asset.get('live'),
- 'timestamp': parse_iso8601(asset.get('date')),
- }
diff --git a/youtube_dl/extractor/srmediathek.py b/youtube_dl/extractor/srmediathek.py
deleted file mode 100644
index 28baf901c..000000000
--- a/youtube_dl/extractor/srmediathek.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .ard import ARDMediathekIE
-from ..utils import (
- ExtractorError,
- get_element_by_attribute,
-)
-
-
-class SRMediathekIE(ARDMediathekIE):
- IE_NAME = 'sr:mediathek'
- IE_DESC = 'Saarländischer Rundfunk'
- _VALID_URL = r'https?://sr-mediathek(?:\.sr-online)?\.de/index\.php\?.*?&id=(?P<id>[0-9]+)'
-
- _TESTS = [{
- 'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=28455',
- 'info_dict': {
- 'id': '28455',
- 'ext': 'mp4',
- 'title': 'sportarena (26.10.2014)',
- 'description': 'Ringen: KSV Köllerbach gegen Aachen-Walheim; Frauen-Fußball: 1. FC Saarbrücken gegen Sindelfingen; Motorsport: Rallye in Losheim; dazu: Interview mit Timo Bernhard; Turnen: TG Saar; Reitsport: Deutscher Voltigier-Pokal; Badminton: Interview mit Michael Fuchs ',
- 'thumbnail': r're:^https?://.*\.jpg$',
- },
- 'skip': 'no longer available',
- }, {
- 'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=37682',
- 'info_dict': {
- 'id': '37682',
- 'ext': 'mp4',
- 'title': 'Love, Cakes and Rock\'n\'Roll',
- 'description': 'md5:18bf9763631c7d326c22603681e1123d',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- 'url': 'http://sr-mediathek.de/index.php?seite=7&id=7480',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- if '>Der gew&uuml;nschte Beitrag ist leider nicht mehr verf&uuml;gbar.<' in webpage:
- raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
-
- media_collection_url = self._search_regex(
- r'data-mediacollection-ardplayer="([^"]+)"', webpage, 'media collection url')
- info = self._extract_media_info(media_collection_url, webpage, video_id)
- info.update({
- 'id': video_id,
- 'title': get_element_by_attribute('class', 'ardplayer-title', webpage),
- 'description': self._og_search_description(webpage),
- 'thumbnail': self._og_search_thumbnail(webpage),
- })
- return info
diff --git a/youtube_dl/extractor/streamango.py b/youtube_dl/extractor/streamango.py
deleted file mode 100644
index f1e17dd88..000000000
--- a/youtube_dl/extractor/streamango.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_chr
-from ..utils import (
- determine_ext,
- ExtractorError,
- int_or_none,
- js_to_json,
-)
-
-
-class StreamangoIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?(?:streamango\.com|fruithosts\.net|streamcherry\.com)/(?:f|embed)/(?P<id>[^/?#&]+)'
- _TESTS = [{
- 'url': 'https://streamango.com/f/clapasobsptpkdfe/20170315_150006_mp4',
- 'md5': 'e992787515a182f55e38fc97588d802a',
- 'info_dict': {
- 'id': 'clapasobsptpkdfe',
- 'ext': 'mp4',
- 'title': '20170315_150006.mp4',
- }
- }, {
- # no og:title
- 'url': 'https://streamango.com/embed/foqebrpftarclpob/asdf_asd_2_mp4',
- 'info_dict': {
- 'id': 'foqebrpftarclpob',
- 'ext': 'mp4',
- 'title': 'foqebrpftarclpob',
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'gone',
- }, {
- 'url': 'https://streamango.com/embed/clapasobsptpkdfe/20170315_150006_mp4',
- 'only_matching': True,
- }, {
- 'url': 'https://fruithosts.net/f/mreodparcdcmspsm/w1f1_r4lph_2018_brrs_720p_latino_mp4',
- 'only_matching': True,
- }, {
- 'url': 'https://streamcherry.com/f/clapasobsptpkdfe/',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- def decrypt_src(encoded, val):
- ALPHABET = '=/+9876543210zyxwvutsrqponmlkjihgfedcbaZYXWVUTSRQPONMLKJIHGFEDCBA'
- encoded = re.sub(r'[^A-Za-z0-9+/=]', '', encoded)
- decoded = ''
- sm = [None] * 4
- i = 0
- str_len = len(encoded)
- while i < str_len:
- for j in range(4):
- sm[j % 4] = ALPHABET.index(encoded[i])
- i += 1
- char_code = ((sm[0] << 0x2) | (sm[1] >> 0x4)) ^ val
- decoded += compat_chr(char_code)
- if sm[2] != 0x40:
- char_code = ((sm[1] & 0xf) << 0x4) | (sm[2] >> 0x2)
- decoded += compat_chr(char_code)
- if sm[3] != 0x40:
- char_code = ((sm[2] & 0x3) << 0x6) | sm[3]
- decoded += compat_chr(char_code)
- return decoded
-
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- title = self._og_search_title(webpage, default=video_id)
-
- formats = []
- for format_ in re.findall(r'({[^}]*\bsrc\s*:\s*[^}]*})', webpage):
- mobj = re.search(r'(src\s*:\s*[^(]+\(([^)]*)\)[\s,]*)', format_)
- if mobj is None:
- continue
-
- format_ = format_.replace(mobj.group(0), '')
-
- video = self._parse_json(
- format_, video_id, transform_source=js_to_json,
- fatal=False) or {}
-
- mobj = re.search(
- r'([\'"])(?P<src>(?:(?!\1).)+)\1\s*,\s*(?P<val>\d+)',
- mobj.group(1))
- if mobj is None:
- continue
-
- src = decrypt_src(mobj.group('src'), int_or_none(mobj.group('val')))
- if not src:
- continue
-
- ext = determine_ext(src, default_ext=None)
- if video.get('type') == 'application/dash+xml' or ext == 'mpd':
- formats.extend(self._extract_mpd_formats(
- src, video_id, mpd_id='dash', fatal=False))
- else:
- formats.append({
- 'url': src,
- 'ext': ext or 'mp4',
- 'width': int_or_none(video.get('width')),
- 'height': int_or_none(video.get('height')),
- 'tbr': int_or_none(video.get('bitrate')),
- })
-
- if not formats:
- error = self._search_regex(
- r'<p[^>]+\bclass=["\']lead[^>]+>(.+?)</p>', webpage,
- 'error', default=None)
- if not error and '>Sorry' in webpage:
- error = 'Video %s is not available' % video_id
- if error:
- raise ExtractorError(error, expected=True)
-
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'url': url,
- 'title': title,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/streamcloud.py b/youtube_dl/extractor/streamcloud.py
deleted file mode 100644
index b97bb4374..000000000
--- a/youtube_dl/extractor/streamcloud.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- urlencode_postdata,
-)
-
-
-class StreamcloudIE(InfoExtractor):
- IE_NAME = 'streamcloud.eu'
- _VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)(?:/(?P<fname>[^#?]*)\.html)?'
-
- _TESTS = [{
- 'url': 'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
- 'md5': '6bea4c7fa5daaacc2a946b7146286686',
- 'info_dict': {
- 'id': 'skp9j99s4bpz',
- 'ext': 'mp4',
- 'title': 'youtube-dl test video \'/\\ ä ↭',
- },
- 'skip': 'Only available from the EU'
- }, {
- 'url': 'http://streamcloud.eu/ua8cmfh1nbe6/NSHIP-148--KUC-NG--H264-.mp4.html',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- url = 'http://streamcloud.eu/%s' % video_id
-
- orig_webpage = self._download_webpage(url, video_id)
-
- if '>File Not Found<' in orig_webpage:
- raise ExtractorError(
- 'Video %s does not exist' % video_id, expected=True)
-
- fields = re.findall(r'''(?x)<input\s+
- type="(?:hidden|submit)"\s+
- name="([^"]+)"\s+
- (?:id="[^"]+"\s+)?
- value="([^"]*)"
- ''', orig_webpage)
-
- self._sleep(6, video_id)
-
- webpage = self._download_webpage(
- url, video_id, data=urlencode_postdata(fields), headers={
- b'Content-Type': b'application/x-www-form-urlencoded',
- })
-
- try:
- title = self._html_search_regex(
- r'<h1[^>]*>([^<]+)<', webpage, 'title')
- video_url = self._search_regex(
- r'file:\s*"([^"]+)"', webpage, 'video URL')
- except ExtractorError:
- message = self._html_search_regex(
- r'(?s)<div[^>]+class=(["\']).*?msgboxinfo.*?\1[^>]*>(?P<message>.+?)</div>',
- webpage, 'message', default=None, group='message')
- if message:
- raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
- raise
- thumbnail = self._search_regex(
- r'image:\s*"([^"]+)"', webpage, 'thumbnail URL', fatal=False)
-
- return {
- 'id': video_id,
- 'title': title,
- 'url': video_url,
- 'thumbnail': thumbnail,
- 'http_headers': {
- 'Referer': url,
- },
- }
diff --git a/youtube_dl/extractor/stretchinternet.py b/youtube_dl/extractor/stretchinternet.py
deleted file mode 100644
index ae2ac1b42..000000000
--- a/youtube_dl/extractor/stretchinternet.py
+++ /dev/null
@@ -1,48 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import int_or_none
-
-
-class StretchInternetIE(InfoExtractor):
- _VALID_URL = r'https?://portal\.stretchinternet\.com/[^/]+/portal\.htm\?.*?\beventId=(?P<id>\d+)'
- _TEST = {
- 'url': 'https://portal.stretchinternet.com/umary/portal.htm?eventId=313900&streamType=video',
- 'info_dict': {
- 'id': '313900',
- 'ext': 'mp4',
- 'title': 'Augustana (S.D.) Baseball vs University of Mary',
- 'description': 'md5:7578478614aae3bdd4a90f578f787438',
- 'timestamp': 1490468400,
- 'upload_date': '20170325',
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- stream = self._download_json(
- 'https://neo-client.stretchinternet.com/streamservice/v1/media/stream/v%s'
- % video_id, video_id)
-
- video_url = 'https://%s' % stream['source']
-
- event = self._download_json(
- 'https://neo-client.stretchinternet.com/portal-ws/getEvent.json',
- video_id, query={
- 'clientID': 99997,
- 'eventID': video_id,
- 'token': 'asdf',
- })['event']
-
- title = event.get('title') or event['mobileTitle']
- description = event.get('customText')
- timestamp = int_or_none(event.get('longtime'))
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'timestamp': timestamp,
- 'url': video_url,
- }
diff --git a/youtube_dl/extractor/stv.py b/youtube_dl/extractor/stv.py
deleted file mode 100644
index ccb074cd4..000000000
--- a/youtube_dl/extractor/stv.py
+++ /dev/null
@@ -1,94 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import (
- compat_parse_qs,
- compat_urllib_parse_urlparse
-)
-from ..utils import (
- extract_attributes,
- float_or_none,
- int_or_none,
- str_or_none,
-)
-
-
-class STVPlayerIE(InfoExtractor):
- IE_NAME = 'stv:player'
- _VALID_URL = r'https?://player\.stv\.tv/(?P<type>episode|video)/(?P<id>[a-z0-9]{4})'
- _TEST = {
- 'url': 'https://player.stv.tv/video/7srz/victoria/interview-with-the-cast-ahead-of-new-victoria/',
- 'md5': '2ad867d4afd641fa14187596e0fbc91b',
- 'info_dict': {
- 'id': '6016487034001',
- 'ext': 'mp4',
- 'upload_date': '20190321',
- 'title': 'Interview with the cast ahead of new Victoria',
- 'description': 'Nell Hudson and Lily Travers tell us what to expect in the new season of Victoria.',
- 'timestamp': 1553179628,
- 'uploader_id': '1486976045',
- },
- 'skip': 'this resource is unavailable outside of the UK',
- }
- _PUBLISHER_ID = '1486976045'
- _PTYPE_MAP = {
- 'episode': 'episodes',
- 'video': 'shortform',
- }
-
- def _real_extract(self, url):
- ptype, video_id = re.match(self._VALID_URL, url).groups()
- webpage = self._download_webpage(url, video_id)
-
- qs = compat_parse_qs(compat_urllib_parse_urlparse(self._search_regex(
- r'itemprop="embedURL"[^>]+href="([^"]+)',
- webpage, 'embed URL', default=None)).query)
- publisher_id = qs.get('publisherID', [None])[0] or self._PUBLISHER_ID
-
- player_attr = extract_attributes(self._search_regex(
- r'(<[^>]+class="bcplayer"[^>]+>)', webpage, 'player', default=None)) or {}
-
- info = {}
- duration = ref_id = series = video_id = None
- api_ref_id = player_attr.get('data-player-api-refid')
- if api_ref_id:
- resp = self._download_json(
- 'https://player.api.stv.tv/v1/%s/%s' % (self._PTYPE_MAP[ptype], api_ref_id),
- api_ref_id, fatal=False)
- if resp:
- result = resp.get('results') or {}
- video = result.get('video') or {}
- video_id = str_or_none(video.get('id'))
- ref_id = video.get('guid')
- duration = video.get('length')
- programme = result.get('programme') or {}
- series = programme.get('name') or programme.get('shortName')
- subtitles = {}
- _subtitles = result.get('_subtitles') or {}
- for ext, sub_url in _subtitles.items():
- subtitles.setdefault('en', []).append({
- 'ext': 'vtt' if ext == 'webvtt' else ext,
- 'url': sub_url,
- })
- info.update({
- 'description': result.get('summary'),
- 'subtitles': subtitles,
- 'view_count': int_or_none(result.get('views')),
- })
- if not video_id:
- video_id = qs.get('videoId', [None])[0] or self._search_regex(
- r'<link\s+itemprop="url"\s+href="(\d+)"',
- webpage, 'video id', default=None) or 'ref:' + (ref_id or player_attr['data-refid'])
-
- info.update({
- '_type': 'url_transparent',
- 'duration': float_or_none(duration or player_attr.get('data-duration'), 1000),
- 'id': video_id,
- 'ie_key': 'BrightcoveNew',
- 'series': series or player_attr.get('data-programme-name'),
- 'url': 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id),
- })
- return info
diff --git a/youtube_dl/extractor/svt.py b/youtube_dl/extractor/svt.py
deleted file mode 100644
index 0901c3163..000000000
--- a/youtube_dl/extractor/svt.py
+++ /dev/null
@@ -1,371 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import (
- compat_parse_qs,
- compat_urllib_parse_urlparse,
-)
-from ..utils import (
- determine_ext,
- dict_get,
- int_or_none,
- orderedSet,
- strip_or_none,
- try_get,
- urljoin,
- compat_str,
-)
-
-
-class SVTBaseIE(InfoExtractor):
- _GEO_COUNTRIES = ['SE']
-
- def _extract_video(self, video_info, video_id):
- is_live = dict_get(video_info, ('live', 'simulcast'), default=False)
- m3u8_protocol = 'm3u8' if is_live else 'm3u8_native'
- formats = []
- for vr in video_info['videoReferences']:
- player_type = vr.get('playerType') or vr.get('format')
- vurl = vr['url']
- ext = determine_ext(vurl)
- if ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- vurl, video_id,
- ext='mp4', entry_protocol=m3u8_protocol,
- m3u8_id=player_type, fatal=False))
- elif ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- vurl + '?hdcore=3.3.0', video_id,
- f4m_id=player_type, fatal=False))
- elif ext == 'mpd':
- if player_type == 'dashhbbtv':
- formats.extend(self._extract_mpd_formats(
- vurl, video_id, mpd_id=player_type, fatal=False))
- else:
- formats.append({
- 'format_id': player_type,
- 'url': vurl,
- })
- if not formats and video_info.get('rights', {}).get('geoBlockedSweden'):
- self.raise_geo_restricted(
- 'This video is only available in Sweden',
- countries=self._GEO_COUNTRIES)
- self._sort_formats(formats)
-
- subtitles = {}
- subtitle_references = dict_get(video_info, ('subtitles', 'subtitleReferences'))
- if isinstance(subtitle_references, list):
- for sr in subtitle_references:
- subtitle_url = sr.get('url')
- subtitle_lang = sr.get('language', 'sv')
- if subtitle_url:
- if determine_ext(subtitle_url) == 'm3u8':
- # TODO(yan12125): handle WebVTT in m3u8 manifests
- continue
-
- subtitles.setdefault(subtitle_lang, []).append({'url': subtitle_url})
-
- title = video_info.get('title')
-
- series = video_info.get('programTitle')
- season_number = int_or_none(video_info.get('season'))
- episode = video_info.get('episodeTitle')
- episode_number = int_or_none(video_info.get('episodeNumber'))
-
- duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration')))
- age_limit = None
- adult = dict_get(
- video_info, ('inappropriateForChildren', 'blockedForChildren'),
- skip_false_values=False)
- if adult is not None:
- age_limit = 18 if adult else 0
-
- return {
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- 'subtitles': subtitles,
- 'duration': duration,
- 'age_limit': age_limit,
- 'series': series,
- 'season_number': season_number,
- 'episode': episode,
- 'episode_number': episode_number,
- 'is_live': is_live,
- }
-
-
-class SVTIE(SVTBaseIE):
- _VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)'
- _TEST = {
- 'url': 'http://www.svt.se/wd?widgetId=23991&sectionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false',
- 'md5': '33e9a5d8f646523ce0868ecfb0eed77d',
- 'info_dict': {
- 'id': '2900353',
- 'ext': 'mp4',
- 'title': 'Stjärnorna skojar till det - under SVT-intervjun',
- 'duration': 27,
- 'age_limit': 0,
- },
- }
-
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'(?:<iframe src|href)="(?P<url>%s[^"]*)"' % SVTIE._VALID_URL, webpage)
- if mobj:
- return mobj.group('url')
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- widget_id = mobj.group('widget_id')
- article_id = mobj.group('id')
-
- info = self._download_json(
- 'http://www.svt.se/wd?widgetId=%s&articleId=%s&format=json&type=embed&output=json' % (widget_id, article_id),
- article_id)
-
- info_dict = self._extract_video(info['video'], article_id)
- info_dict['title'] = info['context']['title']
- return info_dict
-
-
-class SVTPlayBaseIE(SVTBaseIE):
- _SVTPLAY_RE = r'root\s*\[\s*(["\'])_*svtplay\1\s*\]\s*=\s*(?P<json>{.+?})\s*;\s*\n'
-
-
-class SVTPlayIE(SVTPlayBaseIE):
- IE_DESC = 'SVT Play and Öppet arkiv'
- _VALID_URL = r'''(?x)
- (?:
- svt:(?P<svt_id>[^/?#&]+)|
- https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
- )
- '''
- _TESTS = [{
- 'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2',
- 'md5': '2b6704fe4a28801e1a098bbf3c5ac611',
- 'info_dict': {
- 'id': '5996901',
- 'ext': 'mp4',
- 'title': 'Flygplan till Haile Selassie',
- 'duration': 3527,
- 'thumbnail': r're:^https?://.*[\.-]jpg$',
- 'age_limit': 0,
- 'subtitles': {
- 'sv': [{
- 'ext': 'wsrt',
- }]
- },
- },
- }, {
- # geo restricted to Sweden
- 'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
- 'only_matching': True,
- }, {
- 'url': 'http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg',
- 'only_matching': True,
- }, {
- 'url': 'https://www.svtplay.se/kanaler/svt1',
- 'only_matching': True,
- }, {
- 'url': 'svt:1376446-003A',
- 'only_matching': True,
- }, {
- 'url': 'svt:14278044',
- 'only_matching': True,
- }]
-
- def _adjust_title(self, info):
- if info['is_live']:
- info['title'] = self._live_title(info['title'])
-
- def _extract_by_video_id(self, video_id, webpage=None):
- data = self._download_json(
- 'https://api.svt.se/videoplayer-api/video/%s' % video_id,
- video_id, headers=self.geo_verification_headers())
- info_dict = self._extract_video(data, video_id)
- if not info_dict.get('title'):
- title = dict_get(info_dict, ('episode', 'series'))
- if not title and webpage:
- title = re.sub(
- r'\s*\|\s*.+?$', '', self._og_search_title(webpage))
- if not title:
- title = video_id
- info_dict['title'] = title
- self._adjust_title(info_dict)
- return info_dict
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id, svt_id = mobj.group('id', 'svt_id')
-
- if svt_id:
- return self._extract_by_video_id(svt_id)
-
- webpage = self._download_webpage(url, video_id)
-
- data = self._parse_json(
- self._search_regex(
- self._SVTPLAY_RE, webpage, 'embedded data', default='{}',
- group='json'),
- video_id, fatal=False)
-
- thumbnail = self._og_search_thumbnail(webpage)
-
- if data:
- video_info = try_get(
- data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'],
- dict)
- if video_info:
- info_dict = self._extract_video(video_info, video_id)
- info_dict.update({
- 'title': data['context']['dispatcher']['stores']['MetaStore']['title'],
- 'thumbnail': thumbnail,
- })
- self._adjust_title(info_dict)
- return info_dict
-
- svt_id = self._search_regex(
- r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
- webpage, 'video id')
-
- return self._extract_by_video_id(svt_id, webpage)
-
-
-class SVTSeriesIE(SVTPlayBaseIE):
- _VALID_URL = r'https?://(?:www\.)?svtplay\.se/(?P<id>[^/?&#]+)'
- _TESTS = [{
- 'url': 'https://www.svtplay.se/rederiet',
- 'info_dict': {
- 'id': 'rederiet',
- 'title': 'Rederiet',
- 'description': 'md5:505d491a58f4fcf6eb418ecab947e69e',
- },
- 'playlist_mincount': 318,
- }, {
- 'url': 'https://www.svtplay.se/rederiet?tab=sasong2',
- 'info_dict': {
- 'id': 'rederiet-sasong2',
- 'title': 'Rederiet - Säsong 2',
- 'description': 'md5:505d491a58f4fcf6eb418ecab947e69e',
- },
- 'playlist_count': 12,
- }]
-
- @classmethod
- def suitable(cls, url):
- return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTSeriesIE, cls).suitable(url)
-
- def _real_extract(self, url):
- series_id = self._match_id(url)
-
- qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
- season_slug = qs.get('tab', [None])[0]
-
- if season_slug:
- series_id += '-%s' % season_slug
-
- webpage = self._download_webpage(
- url, series_id, 'Downloading series page')
-
- root = self._parse_json(
- self._search_regex(
- self._SVTPLAY_RE, webpage, 'content', group='json'),
- series_id)
-
- season_name = None
-
- entries = []
- for season in root['relatedVideoContent']['relatedVideosAccordion']:
- if not isinstance(season, dict):
- continue
- if season_slug:
- if season.get('slug') != season_slug:
- continue
- season_name = season.get('name')
- videos = season.get('videos')
- if not isinstance(videos, list):
- continue
- for video in videos:
- content_url = video.get('contentUrl')
- if not content_url or not isinstance(content_url, compat_str):
- continue
- entries.append(
- self.url_result(
- urljoin(url, content_url),
- ie=SVTPlayIE.ie_key(),
- video_title=video.get('title')
- ))
-
- metadata = root.get('metaData')
- if not isinstance(metadata, dict):
- metadata = {}
-
- title = metadata.get('title')
- season_name = season_name or season_slug
-
- if title and season_name:
- title = '%s - %s' % (title, season_name)
- elif season_slug:
- title = season_slug
-
- return self.playlist_result(
- entries, series_id, title, metadata.get('description'))
-
-
-class SVTPageIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?svt\.se/(?:[^/]+/)*(?P<id>[^/?&#]+)'
- _TESTS = [{
- 'url': 'https://www.svt.se/sport/oseedat/guide-sommartraningen-du-kan-gora-var-och-nar-du-vill',
- 'info_dict': {
- 'id': 'guide-sommartraningen-du-kan-gora-var-och-nar-du-vill',
- 'title': 'GUIDE: Sommarträning du kan göra var och när du vill',
- },
- 'playlist_count': 7,
- }, {
- 'url': 'https://www.svt.se/nyheter/inrikes/ebba-busch-thor-kd-har-delvis-ratt-om-no-go-zoner',
- 'info_dict': {
- 'id': 'ebba-busch-thor-kd-har-delvis-ratt-om-no-go-zoner',
- 'title': 'Ebba Busch Thor har bara delvis rätt om ”no-go-zoner”',
- },
- 'playlist_count': 1,
- }, {
- # only programTitle
- 'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
- 'info_dict': {
- 'id': '2900353',
- 'ext': 'mp4',
- 'title': 'Stjärnorna skojar till det - under SVT-intervjun',
- 'duration': 27,
- 'age_limit': 0,
- },
- }, {
- 'url': 'https://www.svt.se/nyheter/lokalt/vast/svt-testar-tar-nagon-upp-skrapet-1',
- 'only_matching': True,
- }, {
- 'url': 'https://www.svt.se/vader/manadskronikor/maj2018',
- 'only_matching': True,
- }]
-
- @classmethod
- def suitable(cls, url):
- return False if SVTIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
-
- webpage = self._download_webpage(url, playlist_id)
-
- entries = [
- self.url_result(
- 'svt:%s' % video_id, ie=SVTPlayIE.ie_key(), video_id=video_id)
- for video_id in orderedSet(re.findall(
- r'data-video-id=["\'](\d+)', webpage))]
-
- title = strip_or_none(self._og_search_title(webpage, default=None))
-
- return self.playlist_result(entries, playlist_id, title)
diff --git a/youtube_dl/extractor/teachable.py b/youtube_dl/extractor/teachable.py
deleted file mode 100644
index 7d2e34b3b..000000000
--- a/youtube_dl/extractor/teachable.py
+++ /dev/null
@@ -1,266 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from .wistia import WistiaIE
-from ..compat import compat_str
-from ..utils import (
- clean_html,
- ExtractorError,
- get_element_by_class,
- urlencode_postdata,
- urljoin,
-)
-
-
-class TeachableBaseIE(InfoExtractor):
- _NETRC_MACHINE = 'teachable'
- _URL_PREFIX = 'teachable:'
-
- _SITES = {
- # Only notable ones here
- 'upskillcourses.com': 'upskill',
- 'academy.gns3.com': 'gns3',
- 'academyhacker.com': 'academyhacker',
- 'stackskills.com': 'stackskills',
- 'market.saleshacker.com': 'saleshacker',
- 'learnability.org': 'learnability',
- 'edurila.com': 'edurila',
- 'courses.workitdaily.com': 'workitdaily',
- }
-
- _VALID_URL_SUB_TUPLE = (_URL_PREFIX, '|'.join(re.escape(site) for site in _SITES.keys()))
-
- def _real_initialize(self):
- self._logged_in = False
-
- def _login(self, site):
- if self._logged_in:
- return
-
- username, password = self._get_login_info(
- netrc_machine=self._SITES.get(site, site))
- if username is None:
- return
-
- login_page, urlh = self._download_webpage_handle(
- 'https://%s/sign_in' % site, None,
- 'Downloading %s login page' % site)
-
- def is_logged(webpage):
- return any(re.search(p, webpage) for p in (
- r'class=["\']user-signout',
- r'<a[^>]+\bhref=["\']/sign_out',
- r'Log\s+[Oo]ut\s*<'))
-
- if is_logged(login_page):
- self._logged_in = True
- return
-
- login_url = compat_str(urlh.geturl())
-
- login_form = self._hidden_inputs(login_page)
-
- login_form.update({
- 'user[email]': username,
- 'user[password]': password,
- })
-
- post_url = self._search_regex(
- r'<form[^>]+action=(["\'])(?P<url>(?:(?!\1).)+)\1', login_page,
- 'post url', default=login_url, group='url')
-
- if not post_url.startswith('http'):
- post_url = urljoin(login_url, post_url)
-
- response = self._download_webpage(
- post_url, None, 'Logging in to %s' % site,
- data=urlencode_postdata(login_form),
- headers={
- 'Content-Type': 'application/x-www-form-urlencoded',
- 'Referer': login_url,
- })
-
- if '>I accept the new Privacy Policy<' in response:
- raise ExtractorError(
- 'Unable to login: %s asks you to accept new Privacy Policy. '
- 'Go to https://%s/ and accept.' % (site, site), expected=True)
-
- # Successful login
- if is_logged(response):
- self._logged_in = True
- return
-
- message = get_element_by_class('alert', response)
- if message is not None:
- raise ExtractorError(
- 'Unable to login: %s' % clean_html(message), expected=True)
-
- raise ExtractorError('Unable to log in')
-
-
-class TeachableIE(TeachableBaseIE):
- _VALID_URL = r'''(?x)
- (?:
- %shttps?://(?P<site_t>[^/]+)|
- https?://(?:www\.)?(?P<site>%s)
- )
- /courses/[^/]+/lectures/(?P<id>\d+)
- ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE
-
- _TESTS = [{
- 'url': 'http://upskillcourses.com/courses/essential-web-developer-course/lectures/1747100',
- 'info_dict': {
- 'id': 'uzw6zw58or',
- 'ext': 'mp4',
- 'title': 'Welcome to the Course!',
- 'description': 'md5:65edb0affa582974de4625b9cdea1107',
- 'duration': 138.763,
- 'timestamp': 1479846621,
- 'upload_date': '20161122',
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'http://upskillcourses.com/courses/119763/lectures/1747100',
- 'only_matching': True,
- }, {
- 'url': 'https://academy.gns3.com/courses/423415/lectures/6885939',
- 'only_matching': True,
- }, {
- 'url': 'teachable:https://upskillcourses.com/courses/essential-web-developer-course/lectures/1747100',
- 'only_matching': True,
- }]
-
- @staticmethod
- def _is_teachable(webpage):
- return 'teachableTracker.linker:autoLink' in webpage and re.search(
- r'<link[^>]+href=["\']https?://process\.fs\.teachablecdn\.com',
- webpage)
-
- @staticmethod
- def _extract_url(webpage, source_url):
- if not TeachableIE._is_teachable(webpage):
- return
- if re.match(r'https?://[^/]+/(?:courses|p)', source_url):
- return '%s%s' % (TeachableBaseIE._URL_PREFIX, source_url)
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- site = mobj.group('site') or mobj.group('site_t')
- video_id = mobj.group('id')
-
- self._login(site)
-
- prefixed = url.startswith(self._URL_PREFIX)
- if prefixed:
- url = url[len(self._URL_PREFIX):]
-
- webpage = self._download_webpage(url, video_id)
-
- wistia_url = WistiaIE._extract_url(webpage)
- if not wistia_url:
- if any(re.search(p, webpage) for p in (
- r'class=["\']lecture-contents-locked',
- r'>\s*Lecture contents locked',
- r'id=["\']lecture-locked')):
- self.raise_login_required('Lecture contents locked')
-
- title = self._og_search_title(webpage, default=None)
-
- return {
- '_type': 'url_transparent',
- 'url': wistia_url,
- 'ie_key': WistiaIE.ie_key(),
- 'title': title,
- }
-
-
-class TeachableCourseIE(TeachableBaseIE):
- _VALID_URL = r'''(?x)
- (?:
- %shttps?://(?P<site_t>[^/]+)|
- https?://(?:www\.)?(?P<site>%s)
- )
- /(?:courses|p)/(?:enrolled/)?(?P<id>[^/?#&]+)
- ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE
- _TESTS = [{
- 'url': 'http://upskillcourses.com/courses/essential-web-developer-course/',
- 'info_dict': {
- 'id': 'essential-web-developer-course',
- 'title': 'The Essential Web Developer Course (Free)',
- },
- 'playlist_count': 192,
- }, {
- 'url': 'http://upskillcourses.com/courses/119763/',
- 'only_matching': True,
- }, {
- 'url': 'http://upskillcourses.com/courses/enrolled/119763',
- 'only_matching': True,
- }, {
- 'url': 'https://academy.gns3.com/courses/enrolled/423415',
- 'only_matching': True,
- }, {
- 'url': 'teachable:https://learn.vrdev.school/p/gear-vr-developer-mini',
- 'only_matching': True,
- }, {
- 'url': 'teachable:https://filmsimplified.com/p/davinci-resolve-15-crash-course',
- 'only_matching': True,
- }]
-
- @classmethod
- def suitable(cls, url):
- return False if TeachableIE.suitable(url) else super(
- TeachableCourseIE, cls).suitable(url)
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- site = mobj.group('site') or mobj.group('site_t')
- course_id = mobj.group('id')
-
- self._login(site)
-
- prefixed = url.startswith(self._URL_PREFIX)
- if prefixed:
- prefix = self._URL_PREFIX
- url = url[len(prefix):]
-
- webpage = self._download_webpage(url, course_id)
-
- url_base = 'https://%s/' % site
-
- entries = []
-
- for mobj in re.finditer(
- r'(?s)(?P<li><li[^>]+class=(["\'])(?:(?!\2).)*?section-item[^>]+>.+?</li>)',
- webpage):
- li = mobj.group('li')
- if 'fa-youtube-play' not in li:
- continue
- lecture_url = self._search_regex(
- r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', li,
- 'lecture url', default=None, group='url')
- if not lecture_url:
- continue
- lecture_id = self._search_regex(
- r'/lectures/(\d+)', lecture_url, 'lecture id', default=None)
- title = self._html_search_regex(
- r'<span[^>]+class=["\']lecture-name[^>]+>([^<]+)', li,
- 'title', default=None)
- entry_url = urljoin(url_base, lecture_url)
- if prefixed:
- entry_url = self._URL_PREFIX + entry_url
- entries.append(
- self.url_result(
- entry_url,
- ie=TeachableIE.ie_key(), video_id=lecture_id,
- video_title=clean_html(title)))
-
- course_title = self._html_search_regex(
- (r'(?s)<img[^>]+class=["\']course-image[^>]+>\s*<h\d>(.+?)</h',
- r'(?s)<h\d[^>]+class=["\']course-title[^>]+>(.+?)</h'),
- webpage, 'course title', fatal=False)
-
- return self.playlist_result(entries, course_id, course_title)
diff --git a/youtube_dl/extractor/teachingchannel.py b/youtube_dl/extractor/teachingchannel.py
deleted file mode 100644
index e89759714..000000000
--- a/youtube_dl/extractor/teachingchannel.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from .ooyala import OoyalaIE
-
-
-class TeachingChannelIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?teachingchannel\.org/videos/(?P<title>.+)'
-
- _TEST = {
- 'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution',
- 'md5': '3d6361864d7cac20b57c8784da17166f',
- 'info_dict': {
- 'id': 'F3bnlzbToeI6pLEfRyrlfooIILUjz4nM',
- 'ext': 'mp4',
- 'title': 'A History of Teaming',
- 'description': 'md5:2a9033db8da81f2edffa4c99888140b3',
- 'duration': 422.255,
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': ['Ooyala'],
- }
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- title = mobj.group('title')
- webpage = self._download_webpage(url, title)
- ooyala_code = self._search_regex(
- r'data-embed-code=\'(.+?)\'', webpage, 'ooyala code')
-
- return OoyalaIE._build_url_result(ooyala_code)
diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py
deleted file mode 100644
index 7640cf00a..000000000
--- a/youtube_dl/extractor/teamcoco.py
+++ /dev/null
@@ -1,199 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import json
-
-from .turner import TurnerBaseIE
-from ..utils import (
- determine_ext,
- ExtractorError,
- int_or_none,
- mimetype2ext,
- parse_duration,
- parse_iso8601,
- qualities,
-)
-
-
-class TeamcocoIE(TurnerBaseIE):
- _VALID_URL = r'https?://(?:\w+\.)?teamcoco\.com/(?P<id>([^/]+/)*[^/?#]+)'
- _TESTS = [
- {
- 'url': 'http://teamcoco.com/video/mary-kay-remote',
- 'md5': '55d532f81992f5c92046ad02fec34d7d',
- 'info_dict': {
- 'id': '80187',
- 'ext': 'mp4',
- 'title': 'Conan Becomes A Mary Kay Beauty Consultant',
- 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.',
- 'duration': 495.0,
- 'upload_date': '20140402',
- 'timestamp': 1396407600,
- }
- }, {
- 'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
- 'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
- 'info_dict': {
- 'id': '19705',
- 'ext': 'mp4',
- 'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.',
- 'title': 'Louis C.K. Interview Pt. 1 11/3/11',
- 'duration': 288,
- 'upload_date': '20111104',
- 'timestamp': 1320405840,
- }
- }, {
- 'url': 'http://teamcoco.com/video/timothy-olyphant-drinking-whiskey',
- 'info_dict': {
- 'id': '88748',
- 'ext': 'mp4',
- 'title': 'Timothy Olyphant Raises A Toast To “Justified”',
- 'description': 'md5:15501f23f020e793aeca761205e42c24',
- 'upload_date': '20150415',
- 'timestamp': 1429088400,
- },
- 'params': {
- 'skip_download': True, # m3u8 downloads
- }
- }, {
- 'url': 'http://teamcoco.com/video/full-episode-mon-6-1-joel-mchale-jake-tapper-and-musical-guest-courtney-barnett?playlist=x;eyJ0eXBlIjoidGFnIiwiaWQiOjl9',
- 'info_dict': {
- 'id': '89341',
- 'ext': 'mp4',
- 'title': 'Full Episode - Mon. 6/1 - Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett',
- 'description': 'Guests: Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett',
- },
- 'params': {
- 'skip_download': True, # m3u8 downloads
- },
- 'skip': 'This video is no longer available.',
- }, {
- 'url': 'http://teamcoco.com/video/the-conan-audiencey-awards-for-04/25/18',
- 'only_matching': True,
- }, {
- 'url': 'http://teamcoco.com/italy/conan-jordan-schlansky-hit-the-streets-of-florence',
- 'only_matching': True,
- }, {
- 'url': 'http://teamcoco.com/haiti/conan-s-haitian-history-lesson',
- 'only_matching': True,
- }, {
- 'url': 'http://teamcoco.com/israel/conan-hits-the-streets-beaches-of-tel-aviv',
- 'only_matching': True,
- }, {
- 'url': 'https://conan25.teamcoco.com/video/ice-cube-kevin-hart-conan-share-lyft',
- 'only_matching': True,
- }
- ]
-
- def _graphql_call(self, query_template, object_type, object_id):
- find_object = 'find' + object_type
- return self._download_json(
- 'https://teamcoco.com/graphql', object_id, data=json.dumps({
- 'query': query_template % (find_object, object_id)
- }).encode(), headers={
- 'Content-Type': 'application/json',
- })['data'][find_object]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- response = self._graphql_call('''{
- %s(slug: "%s") {
- ... on RecordSlug {
- record {
- id
- title
- teaser
- publishOn
- thumb {
- preview
- }
- file {
- url
- }
- tags {
- name
- }
- duration
- turnerMediaId
- turnerMediaAuthToken
- }
- }
- ... on NotFoundSlug {
- status
- }
- }
-}''', 'Slug', display_id)
- if response.get('status'):
- raise ExtractorError('This video is no longer available.', expected=True)
-
- record = response['record']
- video_id = record['id']
-
- info = {
- 'id': video_id,
- 'display_id': display_id,
- 'title': record['title'],
- 'thumbnail': record.get('thumb', {}).get('preview'),
- 'description': record.get('teaser'),
- 'duration': parse_duration(record.get('duration')),
- 'timestamp': parse_iso8601(record.get('publishOn')),
- }
-
- media_id = record.get('turnerMediaId')
- if media_id:
- self._initialize_geo_bypass({
- 'countries': ['US'],
- })
- info.update(self._extract_ngtv_info(media_id, {
- 'accessToken': record['turnerMediaAuthToken'],
- 'accessTokenType': 'jws',
- }))
- else:
- d = self._download_json(
- 'https://teamcoco.com/_truman/d/' + video_id,
- video_id, fatal=False) or {}
- video_sources = d.get('meta') or {}
- if not video_sources:
- video_sources = self._graphql_call('''{
- %s(id: "%s") {
- src
- }
-}''', 'RecordVideoSource', video_id) or {}
-
- formats = []
- get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
- for format_id, src in video_sources.get('src', {}).items():
- if not isinstance(src, dict):
- continue
- src_url = src.get('src')
- if not src_url:
- continue
- ext = determine_ext(src_url, mimetype2ext(src.get('type')))
- if format_id == 'hls' or ext == 'm3u8':
- # compat_urllib_parse.urljoin does not work here
- if src_url.startswith('/'):
- src_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + src_url
- formats.extend(self._extract_m3u8_formats(
- src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
- else:
- if src_url.startswith('/mp4:protected/'):
- # TODO Correct extraction for these files
- continue
- tbr = int_or_none(self._search_regex(
- r'(\d+)k\.mp4', src_url, 'tbr', default=None))
-
- formats.append({
- 'url': src_url,
- 'ext': ext,
- 'tbr': tbr,
- 'format_id': format_id,
- 'quality': get_quality(format_id),
- })
- if not formats:
- formats = self._extract_m3u8_formats(
- record['file']['url'], video_id, 'mp4', fatal=False)
- self._sort_formats(formats)
- info['formats'] = formats
-
- return info
diff --git a/youtube_dl/extractor/tele5.py b/youtube_dl/extractor/tele5.py
deleted file mode 100644
index 33a72083b..000000000
--- a/youtube_dl/extractor/tele5.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from .nexx import NexxIE
-from ..compat import compat_urlparse
-
-
-class Tele5IE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
- _TESTS = [{
- 'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1549416',
- 'info_dict': {
- 'id': '1549416',
- 'ext': 'mp4',
- 'upload_date': '20180814',
- 'timestamp': 1534290623,
- 'title': 'Pandorum',
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'https://www.tele5.de/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191',
- 'only_matching': True,
- }, {
- 'url': 'https://www.tele5.de/video-clip/?ve_id=1609440',
- 'only_matching': True,
- }, {
- 'url': 'https://www.tele5.de/filme/schlefaz-dragon-crusaders/',
- 'only_matching': True,
- }, {
- 'url': 'https://www.tele5.de/filme/making-of/avengers-endgame/',
- 'only_matching': True,
- }, {
- 'url': 'https://www.tele5.de/star-trek/raumschiff-voyager/ganze-folge/das-vinculum/',
- 'only_matching': True,
- }, {
- 'url': 'https://www.tele5.de/anders-ist-sevda/',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
- video_id = (qs.get('vid') or qs.get('ve_id') or [None])[0]
-
- if not video_id:
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
- video_id = self._html_search_regex(
- (r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](\d+)',
- r'\s+id\s*=\s*["\']player_(\d{6,})',
- r'\bdata-id\s*=\s*["\'](\d{6,})'), webpage, 'video id')
-
- return self.url_result(
- 'https://api.nexx.cloud/v3/759/videos/byid/%s' % video_id,
- ie=NexxIE.ie_key(), video_id=video_id)
diff --git a/youtube_dl/extractor/telecinco.py b/youtube_dl/extractor/telecinco.py
deleted file mode 100644
index d37e1b055..000000000
--- a/youtube_dl/extractor/telecinco.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import json
-import re
-
-from .common import InfoExtractor
-from .ooyala import OoyalaIE
-from ..utils import (
- clean_html,
- determine_ext,
- int_or_none,
- str_or_none,
- urljoin,
-)
-
-
-class TelecincoIE(InfoExtractor):
- IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
- _VALID_URL = r'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html'
-
- _TESTS = [{
- 'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
- 'info_dict': {
- 'id': '1876350223',
- 'title': 'Bacalao con kokotxas al pil-pil',
- 'description': 'md5:1382dacd32dd4592d478cbdca458e5bb',
- },
- 'playlist': [{
- 'md5': 'adb28c37238b675dad0f042292f209a7',
- 'info_dict': {
- 'id': 'JEA5ijCnF6p5W08A1rNKn7',
- 'ext': 'mp4',
- 'title': 'Con Martín Berasategui, hacer un bacalao al pil-pil es fácil y divertido',
- 'duration': 662,
- },
- }]
- }, {
- 'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',
- 'md5': '9468140ebc300fbb8b9d65dc6e5c4b43',
- 'info_dict': {
- 'id': 'jn24Od1zGLG4XUZcnUnZB6',
- 'ext': 'mp4',
- 'title': '¿Quién es este ex futbolista con el que hablan Leo Messi y Luis Suárez?',
- 'description': 'md5:a62ecb5f1934fc787107d7b9a2262805',
- 'duration': 79,
- },
- }, {
- 'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',
- 'md5': 'ae2dc6b7b50b2392076a51c0f70e01f6',
- 'info_dict': {
- 'id': 'aywerkD2Sv1vGNqq9b85Q2',
- 'ext': 'mp4',
- 'title': '#DOYLACARA. Con la trata no hay trato',
- 'description': 'md5:2771356ff7bfad9179c5f5cd954f1477',
- 'duration': 50,
- },
- }, {
- 'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
- 'only_matching': True,
- }, {
- 'url': 'http://www.telecinco.es/espanasinirmaslejos/Espana-gran-destino-turistico_2_1240605043.html',
- 'only_matching': True,
- }, {
- # ooyala video
- 'url': 'http://www.cuatro.com/chesterinlove/a-carta/chester-chester_in_love-chester_edu_2_2331030022.html',
- 'only_matching': True,
- }]
-
- def _parse_content(self, content, url):
- video_id = content['dataMediaId']
- if content.get('dataCmsId') == 'ooyala':
- return self.url_result(
- 'ooyala:%s' % video_id, OoyalaIE.ie_key(), video_id)
- config_url = urljoin(url, content['dataConfig'])
- config = self._download_json(
- config_url, video_id, 'Downloading config JSON')
- title = config['info']['title']
-
- def mmc_url(mmc_type):
- return re.sub(
- r'/(?:flash|html5)\.json', '/%s.json' % mmc_type,
- config['services']['mmc'])
-
- duration = None
- formats = []
- for mmc_type in ('flash', 'html5'):
- mmc = self._download_json(
- mmc_url(mmc_type), video_id,
- 'Downloading %s mmc JSON' % mmc_type, fatal=False)
- if not mmc:
- continue
- if not duration:
- duration = int_or_none(mmc.get('duration'))
- for location in mmc['locations']:
- gat = self._proto_relative_url(location.get('gat'), 'http:')
- gcp = location.get('gcp')
- ogn = location.get('ogn')
- if None in (gat, gcp, ogn):
- continue
- token_data = {
- 'gcp': gcp,
- 'ogn': ogn,
- 'sta': 0,
- }
- media = self._download_json(
- gat, video_id, data=json.dumps(token_data).encode('utf-8'),
- headers={
- 'Content-Type': 'application/json;charset=utf-8',
- 'Referer': url,
- }, fatal=False) or {}
- stream = media.get('stream') or media.get('file')
- if not stream:
- continue
- ext = determine_ext(stream)
- if ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- stream + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
- video_id, f4m_id='hds', fatal=False))
- elif ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- stream, video_id, 'mp4', 'm3u8_native',
- m3u8_id='hls', fatal=False))
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- 'thumbnail': content.get('dataPoster') or config.get('poster', {}).get('imageUrl'),
- 'duration': duration,
- }
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
- article = self._parse_json(self._search_regex(
- r'window\.\$REACTBASE_STATE\.article\s*=\s*({.+})',
- webpage, 'article'), display_id)['article']
- title = article.get('title')
- description = clean_html(article.get('leadParagraph'))
- if article.get('editorialType') != 'VID':
- entries = []
- for p in article.get('body', []):
- content = p.get('content')
- if p.get('type') != 'video' or not content:
- continue
- entries.append(self._parse_content(content, url))
- return self.playlist_result(
- entries, str_or_none(article.get('id')), title, description)
- content = article['opening']['content']
- info = self._parse_content(content, url)
- info.update({
- 'description': description,
- })
- return info
diff --git a/youtube_dl/extractor/telegraaf.py b/youtube_dl/extractor/telegraaf.py
deleted file mode 100644
index 0f576c1ab..000000000
--- a/youtube_dl/extractor/telegraaf.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- determine_ext,
- remove_end,
-)
-
-
-class TelegraafIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/tv/(?:[^/]+/)+(?P<id>\d+)/[^/]+\.html'
- _TEST = {
- 'url': 'http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html',
- 'info_dict': {
- 'id': '24353229',
- 'ext': 'mp4',
- 'title': 'Tikibad ontruimd wegens brand',
- 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 33,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- player_url = self._html_search_regex(
- r'<iframe[^>]+src="([^"]+")', webpage, 'player URL')
- player_page = self._download_webpage(
- player_url, video_id, note='Download player webpage')
- playlist_url = self._search_regex(
- r'playlist\s*:\s*"([^"]+)"', player_page, 'playlist URL')
- playlist_data = self._download_json(playlist_url, video_id)
-
- item = playlist_data['items'][0]
- formats = []
- locations = item['locations']
- for location in locations.get('adaptive', []):
- manifest_url = location['src']
- ext = determine_ext(manifest_url)
- if ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- manifest_url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
- elif ext == 'mpd':
- formats.extend(self._extract_mpd_formats(
- manifest_url, video_id, mpd_id='dash', fatal=False))
- else:
- self.report_warning('Unknown adaptive format %s' % ext)
- for location in locations.get('progressive', []):
- formats.append({
- 'url': location['sources'][0]['src'],
- 'width': location.get('width'),
- 'height': location.get('height'),
- 'format_id': 'http-%s' % location['label'],
- })
-
- self._sort_formats(formats)
-
- title = remove_end(self._og_search_title(webpage), ' - VIDEO')
- description = self._og_search_description(webpage)
- duration = item.get('duration')
- thumbnail = item.get('poster')
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'formats': formats,
- 'duration': duration,
- 'thumbnail': thumbnail,
- }
diff --git a/youtube_dl/extractor/telequebec.py b/youtube_dl/extractor/telequebec.py
deleted file mode 100644
index ae9f66787..000000000
--- a/youtube_dl/extractor/telequebec.py
+++ /dev/null
@@ -1,207 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- int_or_none,
- smuggle_url,
- try_get,
- unified_timestamp,
-)
-
-
-class TeleQuebecBaseIE(InfoExtractor):
- @staticmethod
- def _limelight_result(media_id):
- return {
- '_type': 'url_transparent',
- 'url': smuggle_url(
- 'limelight:media:' + media_id, {'geo_countries': ['CA']}),
- 'ie_key': 'LimelightMedia',
- }
-
-
-class TeleQuebecIE(TeleQuebecBaseIE):
- _VALID_URL = r'''(?x)
- https?://
- (?:
- zonevideo\.telequebec\.tv/media|
- coucou\.telequebec\.tv/videos
- )/(?P<id>\d+)
- '''
- _TESTS = [{
- # available till 01.01.2023
- 'url': 'http://zonevideo.telequebec.tv/media/37578/un-petit-choc-et-puis-repart/un-chef-a-la-cabane',
- 'info_dict': {
- 'id': '577116881b4b439084e6b1cf4ef8b1b3',
- 'ext': 'mp4',
- 'title': 'Un petit choc et puis repart!',
- 'description': 'md5:b04a7e6b3f74e32d7b294cffe8658374',
- 'upload_date': '20180222',
- 'timestamp': 1519326631,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- # no description
- 'url': 'http://zonevideo.telequebec.tv/media/30261',
- 'only_matching': True,
- }, {
- 'url': 'https://coucou.telequebec.tv/videos/41788/idee-de-genie/l-heure-du-bain',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- media_id = self._match_id(url)
-
- media_data = self._download_json(
- 'https://mnmedias.api.telequebec.tv/api/v2/media/' + media_id,
- media_id)['media']
-
- info = self._limelight_result(media_data['streamInfo']['sourceId'])
- info.update({
- 'title': media_data.get('title'),
- 'description': try_get(
- media_data, lambda x: x['descriptions'][0]['text'], compat_str),
- 'duration': int_or_none(
- media_data.get('durationInMilliseconds'), 1000),
- })
- return info
-
-
-class TeleQuebecSquatIE(InfoExtractor):
- _VALID_URL = r'https://squat\.telequebec\.tv/videos/(?P<id>\d+)'
- _TESTS = [{
- 'url': 'https://squat.telequebec.tv/videos/9314',
- 'info_dict': {
- 'id': 'd59ae78112d542e793d83cc9d3a5b530',
- 'ext': 'mp4',
- 'title': 'Poupeflekta',
- 'description': 'md5:2f0718f8d2f8fece1646ee25fb7bce75',
- 'duration': 1351,
- 'timestamp': 1569057600,
- 'upload_date': '20190921',
- 'series': 'Miraculous : Les Aventures de Ladybug et Chat Noir',
- 'season': 'Saison 3',
- 'season_number': 3,
- 'episode_number': 57,
- },
- 'params': {
- 'skip_download': True,
- },
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- video = self._download_json(
- 'https://squat.api.telequebec.tv/v1/videos/%s' % video_id,
- video_id)
-
- media_id = video['sourceId']
-
- return {
- '_type': 'url_transparent',
- 'url': 'http://zonevideo.telequebec.tv/media/%s' % media_id,
- 'ie_key': TeleQuebecIE.ie_key(),
- 'id': media_id,
- 'title': video.get('titre'),
- 'description': video.get('description'),
- 'timestamp': unified_timestamp(video.get('datePublication')),
- 'series': video.get('container'),
- 'season': video.get('saison'),
- 'season_number': int_or_none(video.get('noSaison')),
- 'episode_number': int_or_none(video.get('episode')),
- }
-
-
-class TeleQuebecEmissionIE(TeleQuebecBaseIE):
- _VALID_URL = r'''(?x)
- https?://
- (?:
- [^/]+\.telequebec\.tv/emissions/|
- (?:www\.)?telequebec\.tv/
- )
- (?P<id>[^?#&]+)
- '''
- _TESTS = [{
- 'url': 'http://lindicemcsween.telequebec.tv/emissions/100430013/des-soins-esthetiques-a-377-d-interets-annuels-ca-vous-tente',
- 'info_dict': {
- 'id': '66648a6aef914fe3badda25e81a4d50a',
- 'ext': 'mp4',
- 'title': "Des soins esthétiques à 377 % d'intérêts annuels, ça vous tente?",
- 'description': 'md5:369e0d55d0083f1fc9b71ffb640ea014',
- 'upload_date': '20171024',
- 'timestamp': 1508862118,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'http://bancpublic.telequebec.tv/emissions/emission-49/31986/jeunes-meres-sous-pression',
- 'only_matching': True,
- }, {
- 'url': 'http://www.telequebec.tv/masha-et-michka/epi059masha-et-michka-3-053-078',
- 'only_matching': True,
- }, {
- 'url': 'http://www.telequebec.tv/documentaire/bebes-sur-mesure/',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- media_id = self._search_regex(
- r'mediaUID\s*:\s*["\'][Ll]imelight_(?P<id>[a-z0-9]{32})', webpage,
- 'limelight id')
-
- info = self._limelight_result(media_id)
- info.update({
- 'title': self._og_search_title(webpage, default=None),
- 'description': self._og_search_description(webpage, default=None),
- })
- return info
-
-
-class TeleQuebecLiveIE(InfoExtractor):
- _VALID_URL = r'https?://zonevideo\.telequebec\.tv/(?P<id>endirect)'
- _TEST = {
- 'url': 'http://zonevideo.telequebec.tv/endirect/',
- 'info_dict': {
- 'id': 'endirect',
- 'ext': 'mp4',
- 'title': 're:^Télé-Québec - En direct [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
- 'is_live': True,
- },
- 'params': {
- 'skip_download': True,
- },
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- m3u8_url = None
- webpage = self._download_webpage(
- 'https://player.telequebec.tv/Tq_VideoPlayer.js', video_id,
- fatal=False)
- if webpage:
- m3u8_url = self._search_regex(
- r'm3U8Url\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
- 'm3u8 url', default=None, group='url')
- if not m3u8_url:
- m3u8_url = 'https://teleqmmd.mmdlive.lldns.net/teleqmmd/f386e3b206814e1f8c8c1c71c0f8e748/manifest.m3u8'
- formats = self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4', m3u8_id='hls')
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': self._live_title('Télé-Québec - En direct'),
- 'is_live': True,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/tfo.py b/youtube_dl/extractor/tfo.py
deleted file mode 100644
index 0e2370cd8..000000000
--- a/youtube_dl/extractor/tfo.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import json
-
-from .common import InfoExtractor
-from ..utils import (
- HEADRequest,
- ExtractorError,
- int_or_none,
- clean_html,
-)
-
-
-class TFOIE(InfoExtractor):
- _GEO_COUNTRIES = ['CA']
- _VALID_URL = r'https?://(?:www\.)?tfo\.org/(?:en|fr)/(?:[^/]+/){2}(?P<id>\d+)'
- _TEST = {
- 'url': 'http://www.tfo.org/en/universe/tfo-247/100463871/video-game-hackathon',
- 'md5': '47c987d0515561114cf03d1226a9d4c7',
- 'info_dict': {
- 'id': '100463871',
- 'ext': 'mp4',
- 'title': 'Video Game Hackathon',
- 'description': 'md5:558afeba217c6c8d96c60e5421795c07',
- 'upload_date': '20160212',
- 'timestamp': 1455310233,
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- self._request_webpage(HEADRequest('http://www.tfo.org/'), video_id)
- infos = self._download_json(
- 'http://www.tfo.org/api/web/video/get_infos', video_id, data=json.dumps({
- 'product_id': video_id,
- }).encode(), headers={
- 'X-tfo-session': self._get_cookies('http://www.tfo.org/')['tfo-session'].value,
- })
- if infos.get('success') == 0:
- if infos.get('code') == 'ErrGeoBlocked':
- self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
- raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(infos['msg'])), expected=True)
- video_data = infos['data']
-
- return {
- '_type': 'url_transparent',
- 'id': video_id,
- 'url': 'limelight:media:' + video_data['llid'],
- 'title': video_data['title'],
- 'description': video_data.get('description'),
- 'series': video_data.get('collection'),
- 'season_number': int_or_none(video_data.get('season')),
- 'episode_number': int_or_none(video_data.get('episode')),
- 'duration': int_or_none(video_data.get('duration')),
- 'ie_key': 'LimelightMedia',
- }
diff --git a/youtube_dl/extractor/thesun.py b/youtube_dl/extractor/thesun.py
deleted file mode 100644
index 22d003776..000000000
--- a/youtube_dl/extractor/thesun.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from .ooyala import OoyalaIE
-
-
-class TheSunIE(InfoExtractor):
- _VALID_URL = r'https://(?:www\.)?thesun\.co\.uk/[^/]+/(?P<id>\d+)'
- _TEST = {
- 'url': 'https://www.thesun.co.uk/tvandshowbiz/2261604/orlando-bloom-and-katy-perry-post-adorable-instagram-video-together-celebrating-thanksgiving-after-split-rumours/',
- 'info_dict': {
- 'id': '2261604',
- 'title': 'md5:cba22f48bad9218b64d5bbe0e16afddf',
- },
- 'playlist_count': 2,
- }
-
- def _real_extract(self, url):
- article_id = self._match_id(url)
-
- webpage = self._download_webpage(url, article_id)
-
- entries = []
- for ooyala_id in re.findall(
- r'<[^>]+\b(?:id\s*=\s*"thesun-ooyala-player-|data-content-id\s*=\s*")([^"]+)',
- webpage):
- entries.append(OoyalaIE._build_url_result(ooyala_id))
-
- return self.playlist_result(
- entries, article_id, self._og_search_title(webpage, fatal=False))
diff --git a/youtube_dl/extractor/thisoldhouse.py b/youtube_dl/extractor/thisoldhouse.py
deleted file mode 100644
index 6ab147ad7..000000000
--- a/youtube_dl/extractor/thisoldhouse.py
+++ /dev/null
@@ -1,44 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import try_get
-
-
-class ThisOldHouseIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode)/(?P<id>[^/?#]+)'
- _TESTS = [{
- 'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench',
- 'md5': '568acf9ca25a639f0c4ff905826b662f',
- 'info_dict': {
- 'id': '2REGtUDQ',
- 'ext': 'mp4',
- 'title': 'How to Build a Storage Bench',
- 'description': 'In the workshop, Tom Silva and Kevin O\'Connor build a storage bench for an entryway.',
- 'timestamp': 1442548800,
- 'upload_date': '20150918',
- }
- }, {
- 'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
- 'only_matching': True,
- }, {
- 'url': 'https://www.thisoldhouse.com/tv-episode/ask-toh-shelf-rough-electric',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
- video_id = self._search_regex(
- (r'data-mid=(["\'])(?P<id>(?:(?!\1).)+)\1',
- r'id=(["\'])inline-video-player-(?P<id>(?:(?!\1).)+)\1'),
- webpage, 'video id', default=None, group='id')
- if not video_id:
- drupal_settings = self._parse_json(self._search_regex(
- r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
- webpage, 'drupal settings'), display_id)
- video_id = try_get(
- drupal_settings, lambda x: x['jwplatform']['video_id'],
- compat_str) or list(drupal_settings['comScore'])[0]
- return self.url_result('jwplatform:' + video_id, 'JWPlatform', video_id)
diff --git a/youtube_dl/extractor/toggle.py b/youtube_dl/extractor/toggle.py
deleted file mode 100644
index 5e5efda0f..000000000
--- a/youtube_dl/extractor/toggle.py
+++ /dev/null
@@ -1,210 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import json
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- determine_ext,
- ExtractorError,
- float_or_none,
- int_or_none,
- parse_iso8601,
- sanitized_Request,
-)
-
-
-class ToggleIE(InfoExtractor):
- IE_NAME = 'toggle'
- _VALID_URL = r'https?://video\.toggle\.sg/(?:en|zh)/(?:[^/]+/){2,}(?P<id>[0-9]+)'
- _TESTS = [{
- 'url': 'http://video.toggle.sg/en/series/lion-moms-tif/trailers/lion-moms-premier/343115',
- 'info_dict': {
- 'id': '343115',
- 'ext': 'mp4',
- 'title': 'Lion Moms Premiere',
- 'description': 'md5:aea1149404bff4d7f7b6da11fafd8e6b',
- 'upload_date': '20150910',
- 'timestamp': 1441858274,
- },
- 'params': {
- 'skip_download': 'm3u8 download',
- }
- }, {
- 'note': 'DRM-protected video',
- 'url': 'http://video.toggle.sg/en/movies/dug-s-special-mission/341413',
- 'info_dict': {
- 'id': '341413',
- 'ext': 'wvm',
- 'title': 'Dug\'s Special Mission',
- 'description': 'md5:e86c6f4458214905c1772398fabc93e0',
- 'upload_date': '20150827',
- 'timestamp': 1440644006,
- },
- 'params': {
- 'skip_download': 'DRM-protected wvm download',
- }
- }, {
- # this also tests correct video id extraction
- 'note': 'm3u8 links are geo-restricted, but Android/mp4 is okay',
- 'url': 'http://video.toggle.sg/en/series/28th-sea-games-5-show/28th-sea-games-5-show-ep11/332861',
- 'info_dict': {
- 'id': '332861',
- 'ext': 'mp4',
- 'title': '28th SEA Games (5 Show) - Episode 11',
- 'description': 'md5:3cd4f5f56c7c3b1340c50a863f896faa',
- 'upload_date': '20150605',
- 'timestamp': 1433480166,
- },
- 'params': {
- 'skip_download': 'DRM-protected wvm download',
- },
- 'skip': 'm3u8 links are geo-restricted'
- }, {
- 'url': 'http://video.toggle.sg/en/clips/seraph-sun-aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set/343331',
- 'only_matching': True,
- }, {
- 'url': 'http://video.toggle.sg/zh/series/zero-calling-s2-hd/ep13/336367',
- 'only_matching': True,
- }, {
- 'url': 'http://video.toggle.sg/en/series/vetri-s2/webisodes/jeeva-is-an-orphan-vetri-s2-webisode-7/342302',
- 'only_matching': True,
- }, {
- 'url': 'http://video.toggle.sg/en/movies/seven-days/321936',
- 'only_matching': True,
- }, {
- 'url': 'https://video.toggle.sg/en/tv-show/news/may-2017-cna-singapore-tonight/fri-19-may-2017/512456',
- 'only_matching': True,
- }, {
- 'url': 'http://video.toggle.sg/en/channels/eleven-plus/401585',
- 'only_matching': True,
- }]
-
- _FORMAT_PREFERENCES = {
- 'wvm-STBMain': -10,
- 'wvm-iPadMain': -20,
- 'wvm-iPhoneMain': -30,
- 'wvm-Android': -40,
- }
- _API_USER = 'tvpapi_147'
- _API_PASS = '11111'
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(
- url, video_id, note='Downloading video page')
-
- api_user = self._search_regex(
- r'apiUser\s*:\s*(["\'])(?P<user>.+?)\1', webpage, 'apiUser',
- default=self._API_USER, group='user')
- api_pass = self._search_regex(
- r'apiPass\s*:\s*(["\'])(?P<pass>.+?)\1', webpage, 'apiPass',
- default=self._API_PASS, group='pass')
-
- params = {
- 'initObj': {
- 'Locale': {
- 'LocaleLanguage': '',
- 'LocaleCountry': '',
- 'LocaleDevice': '',
- 'LocaleUserState': 0
- },
- 'Platform': 0,
- 'SiteGuid': 0,
- 'DomainID': '0',
- 'UDID': '',
- 'ApiUser': api_user,
- 'ApiPass': api_pass
- },
- 'MediaID': video_id,
- 'mediaType': 0,
- }
-
- req = sanitized_Request(
- 'http://tvpapi.as.tvinci.com/v2_9/gateways/jsonpostgw.aspx?m=GetMediaInfo',
- json.dumps(params).encode('utf-8'))
- info = self._download_json(req, video_id, 'Downloading video info json')
-
- title = info['MediaName']
-
- formats = []
- for video_file in info.get('Files', []):
- video_url, vid_format = video_file.get('URL'), video_file.get('Format')
- if not video_url or video_url == 'NA' or not vid_format:
- continue
- ext = determine_ext(video_url)
- vid_format = vid_format.replace(' ', '')
- # if geo-restricted, m3u8 is inaccessible, but mp4 is okay
- if ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- video_url, video_id, ext='mp4', m3u8_id=vid_format,
- note='Downloading %s m3u8 information' % vid_format,
- errnote='Failed to download %s m3u8 information' % vid_format,
- fatal=False))
- elif ext == 'mpd':
- formats.extend(self._extract_mpd_formats(
- video_url, video_id, mpd_id=vid_format,
- note='Downloading %s MPD manifest' % vid_format,
- errnote='Failed to download %s MPD manifest' % vid_format,
- fatal=False))
- elif ext == 'ism':
- formats.extend(self._extract_ism_formats(
- video_url, video_id, ism_id=vid_format,
- note='Downloading %s ISM manifest' % vid_format,
- errnote='Failed to download %s ISM manifest' % vid_format,
- fatal=False))
- elif ext in ('mp4', 'wvm'):
- # wvm are drm-protected files
- formats.append({
- 'ext': ext,
- 'url': video_url,
- 'format_id': vid_format,
- 'preference': self._FORMAT_PREFERENCES.get(ext + '-' + vid_format) or -1,
- 'format_note': 'DRM-protected video' if ext == 'wvm' else None
- })
- if not formats:
- # Most likely because geo-blocked
- raise ExtractorError('No downloadable videos found', expected=True)
- self._sort_formats(formats)
-
- duration = int_or_none(info.get('Duration'))
- description = info.get('Description')
- created_at = parse_iso8601(info.get('CreationDate') or None)
-
- average_rating = float_or_none(info.get('Rating'))
- view_count = int_or_none(info.get('ViewCounter') or info.get('view_counter'))
- like_count = int_or_none(info.get('LikeCounter') or info.get('like_counter'))
-
- thumbnails = []
- for picture in info.get('Pictures', []):
- if not isinstance(picture, dict):
- continue
- pic_url = picture.get('URL')
- if not pic_url:
- continue
- thumbnail = {
- 'url': pic_url,
- }
- pic_size = picture.get('PicSize', '')
- m = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', pic_size)
- if m:
- thumbnail.update({
- 'width': int(m.group('width')),
- 'height': int(m.group('height')),
- })
- thumbnails.append(thumbnail)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'timestamp': created_at,
- 'average_rating': average_rating,
- 'view_count': view_count,
- 'like_count': like_count,
- 'thumbnails': thumbnails,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/trunews.py b/youtube_dl/extractor/trunews.py
deleted file mode 100644
index b0c7caabf..000000000
--- a/youtube_dl/extractor/trunews.py
+++ /dev/null
@@ -1,75 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- dict_get,
- float_or_none,
- int_or_none,
- unified_timestamp,
- update_url_query,
- url_or_none,
-)
-
-
-class TruNewsIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?trunews\.com/stream/(?P<id>[^/?#&]+)'
- _TEST = {
- 'url': 'https://www.trunews.com/stream/will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech',
- 'md5': 'a19c024c3906ff954fac9b96ce66bb08',
- 'info_dict': {
- 'id': '5c5a21e65d3c196e1c0020cc',
- 'display_id': 'will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech',
- 'ext': 'mp4',
- 'title': "Will Democrats Stage a Circus During President Trump's State of the Union Speech?",
- 'description': 'md5:c583b72147cc92cf21f56a31aff7a670',
- 'duration': 3685,
- 'timestamp': 1549411440,
- 'upload_date': '20190206',
- },
- 'add_ie': ['Zype'],
- }
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- video = self._download_json(
- 'https://api.zype.com/videos', display_id, query={
- 'app_key': 'PUVKp9WgGUb3-JUw6EqafLx8tFVP6VKZTWbUOR-HOm__g4fNDt1bCsm_LgYf_k9H',
- 'per_page': 1,
- 'active': 'true',
- 'friendly_title': display_id,
- })['response'][0]
-
- zype_id = video['_id']
-
- thumbnails = []
- thumbnails_list = video.get('thumbnails')
- if isinstance(thumbnails_list, list):
- for thumbnail in thumbnails_list:
- if not isinstance(thumbnail, dict):
- continue
- thumbnail_url = url_or_none(thumbnail.get('url'))
- if not thumbnail_url:
- continue
- thumbnails.append({
- 'url': thumbnail_url,
- 'width': int_or_none(thumbnail.get('width')),
- 'height': int_or_none(thumbnail.get('height')),
- })
-
- return {
- '_type': 'url_transparent',
- 'url': update_url_query(
- 'https://player.zype.com/embed/%s.js' % zype_id,
- {'api_key': 'X5XnahkjCwJrT_l5zUqypnaLEObotyvtUKJWWlONxDoHVjP8vqxlArLV8llxMbyt'}),
- 'ie_key': 'Zype',
- 'id': zype_id,
- 'display_id': display_id,
- 'title': video.get('title'),
- 'description': dict_get(video, ('description', 'ott_description', 'short_description')),
- 'duration': int_or_none(video.get('duration')),
- 'timestamp': unified_timestamp(video.get('published_at')),
- 'average_rating': float_or_none(video.get('rating')),
- 'view_count': int_or_none(video.get('request_count')),
- 'thumbnails': thumbnails,
- }
diff --git a/youtube_dl/extractor/tumblr.py b/youtube_dl/extractor/tumblr.py
deleted file mode 100644
index edbb0aa69..000000000
--- a/youtube_dl/extractor/tumblr.py
+++ /dev/null
@@ -1,214 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- ExtractorError,
- int_or_none,
- urlencode_postdata
-)
-
-
-class TumblrIE(InfoExtractor):
- _VALID_URL = r'https?://(?P<blog_name>[^/?#&]+)\.tumblr\.com/(?:post|video)/(?P<id>[0-9]+)(?:$|[/?#])'
- _NETRC_MACHINE = 'tumblr'
- _LOGIN_URL = 'https://www.tumblr.com/login'
- _TESTS = [{
- 'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes',
- 'md5': '479bb068e5b16462f5176a6828829767',
- 'info_dict': {
- 'id': '54196191430',
- 'ext': 'mp4',
- 'title': 'tatiana maslany news, Orphan Black || DVD extra - behind the scenes ↳...',
- 'description': 'md5:37db8211e40b50c7c44e95da14f630b7',
- 'thumbnail': r're:http://.*\.jpg',
- }
- }, {
- 'url': 'http://5sostrum.tumblr.com/post/90208453769/yall-forgetting-the-greatest-keek-of-them-all',
- 'md5': 'bf348ef8c0ef84fbf1cbd6fa6e000359',
- 'info_dict': {
- 'id': '90208453769',
- 'ext': 'mp4',
- 'title': '5SOS STRUM ;]',
- 'description': 'md5:dba62ac8639482759c8eb10ce474586a',
- 'thumbnail': r're:http://.*\.jpg',
- }
- }, {
- 'url': 'http://hdvideotest.tumblr.com/post/130323439814/test-description-for-my-hd-video',
- 'md5': '7ae503065ad150122dc3089f8cf1546c',
- 'info_dict': {
- 'id': '130323439814',
- 'ext': 'mp4',
- 'title': 'HD Video Testing \u2014 Test description for my HD video',
- 'description': 'md5:97cc3ab5fcd27ee4af6356701541319c',
- 'thumbnail': r're:http://.*\.jpg',
- },
- 'params': {
- 'format': 'hd',
- },
- }, {
- 'url': 'http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching',
- 'md5': 'de07e5211d60d4f3a2c3df757ea9f6ab',
- 'info_dict': {
- 'id': 'Wmur',
- 'ext': 'mp4',
- 'title': 'naked smoking & stretching',
- 'upload_date': '20150506',
- 'timestamp': 1430931613,
- 'age_limit': 18,
- 'uploader_id': '1638622',
- 'uploader': 'naked-yogi',
- },
- 'add_ie': ['Vidme'],
- }, {
- 'url': 'http://camdamage.tumblr.com/post/98846056295/',
- 'md5': 'a9e0c8371ea1ca306d6554e3fecf50b6',
- 'info_dict': {
- 'id': '105463834',
- 'ext': 'mp4',
- 'title': 'Cam Damage-HD 720p',
- 'uploader': 'John Moyer',
- 'uploader_id': 'user32021558',
- },
- 'add_ie': ['Vimeo'],
- }, {
- 'url': 'http://sutiblr.tumblr.com/post/139638707273',
- 'md5': '2dd184b3669e049ba40563a7d423f95c',
- 'info_dict': {
- 'id': 'ir7qBEIKqvq',
- 'ext': 'mp4',
- 'title': 'Vine by sutiblr',
- 'alt_title': 'Vine by sutiblr',
- 'uploader': 'sutiblr',
- 'uploader_id': '1198993975374495744',
- 'upload_date': '20160220',
- 'like_count': int,
- 'comment_count': int,
- 'repost_count': int,
- },
- 'add_ie': ['Vine'],
- }, {
- 'url': 'http://vitasidorkina.tumblr.com/post/134652425014/joskriver-victoriassecret-invisibility-or',
- 'md5': '01c12ceb82cbf6b2fe0703aa56b3ad72',
- 'info_dict': {
- 'id': '-7LnUPGlSo',
- 'ext': 'mp4',
- 'title': 'Video by victoriassecret',
- 'description': 'Invisibility or flight…which superpower would YOU choose? #VSFashionShow #ThisOrThat',
- 'uploader_id': 'victoriassecret',
- 'thumbnail': r're:^https?://.*\.jpg'
- },
- 'add_ie': ['Instagram'],
- }]
-
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
- login_page = self._download_webpage(
- self._LOGIN_URL, None, 'Downloading login page')
-
- login_form = self._hidden_inputs(login_page)
- login_form.update({
- 'user[email]': username,
- 'user[password]': password
- })
-
- response, urlh = self._download_webpage_handle(
- self._LOGIN_URL, None, 'Logging in',
- data=urlencode_postdata(login_form), headers={
- 'Content-Type': 'application/x-www-form-urlencoded',
- 'Referer': self._LOGIN_URL,
- })
-
- # Successful login
- if '/dashboard' in urlh.geturl():
- return
-
- login_errors = self._parse_json(
- self._search_regex(
- r'RegistrationForm\.errors\s*=\s*(\[.+?\])\s*;', response,
- 'login errors', default='[]'),
- None, fatal=False)
- if login_errors:
- raise ExtractorError(
- 'Unable to login: %s' % login_errors[0], expected=True)
-
- self.report_warning('Login has probably failed')
-
- def _real_extract(self, url):
- m_url = re.match(self._VALID_URL, url)
- video_id = m_url.group('id')
- blog = m_url.group('blog_name')
-
- url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
- webpage, urlh = self._download_webpage_handle(url, video_id)
-
- redirect_url = compat_str(urlh.geturl())
- if 'tumblr.com/safe-mode' in redirect_url or redirect_url.startswith('/safe-mode'):
- raise ExtractorError(
- 'This Tumblr may contain sensitive media. '
- 'Disable safe mode in your account settings '
- 'at https://www.tumblr.com/settings/account#safe_mode',
- expected=True)
-
- iframe_url = self._search_regex(
- r'src=\'(https?://www\.tumblr\.com/video/[^\']+)\'',
- webpage, 'iframe url', default=None)
- if iframe_url is None:
- return self.url_result(redirect_url, 'Generic')
-
- iframe = self._download_webpage(iframe_url, video_id, 'Downloading iframe page')
-
- duration = None
- sources = []
-
- sd_url = self._search_regex(
- r'<source[^>]+src=(["\'])(?P<url>.+?)\1', iframe,
- 'sd video url', default=None, group='url')
- if sd_url:
- sources.append((sd_url, 'sd'))
-
- options = self._parse_json(
- self._search_regex(
- r'data-crt-options=(["\'])(?P<options>.+?)\1', iframe,
- 'hd video url', default='', group='options'),
- video_id, fatal=False)
- if options:
- duration = int_or_none(options.get('duration'))
- hd_url = options.get('hdUrl')
- if hd_url:
- sources.append((hd_url, 'hd'))
-
- formats = [{
- 'url': video_url,
- 'ext': 'mp4',
- 'format_id': format_id,
- 'height': int_or_none(self._search_regex(
- r'/(\d{3,4})$', video_url, 'height', default=None)),
- 'quality': quality,
- } for quality, (video_url, format_id) in enumerate(sources)]
-
- self._sort_formats(formats)
-
- # The only place where you can get a title, it's not complete,
- # but searching in other places doesn't work for all videos
- video_title = self._html_search_regex(
- r'(?s)<title>(?P<title>.*?)(?: \| Tumblr)?</title>',
- webpage, 'title')
-
- return {
- 'id': video_id,
- 'title': video_title,
- 'description': self._og_search_description(webpage, default=None),
- 'thumbnail': self._og_search_thumbnail(webpage, default=None),
- 'duration': duration,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/tutv.py b/youtube_dl/extractor/tutv.py
deleted file mode 100644
index 362318b24..000000000
--- a/youtube_dl/extractor/tutv.py
+++ /dev/null
@@ -1,36 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..compat import (
- compat_b64decode,
- compat_parse_qs,
-)
-
-
-class TutvIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?tu\.tv/videos/(?P<id>[^/?]+)'
- _TEST = {
- 'url': 'http://tu.tv/videos/robots-futbolistas',
- 'md5': '0cd9e28ad270488911b0d2a72323395d',
- 'info_dict': {
- 'id': '2973058',
- 'ext': 'mp4',
- 'title': 'Robots futbolistas',
- },
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- internal_id = self._search_regex(r'codVideo=([0-9]+)', webpage, 'internal video ID')
-
- data_content = self._download_webpage(
- 'http://tu.tv/flvurl.php?codVideo=%s' % internal_id, video_id, 'Downloading video info')
- video_url = compat_b64decode(compat_parse_qs(data_content)['kpt'][0]).decode('utf-8')
-
- return {
- 'id': internal_id,
- 'url': video_url,
- 'title': self._og_search_title(webpage),
- }
diff --git a/youtube_dl/extractor/tv2.py b/youtube_dl/extractor/tv2.py
deleted file mode 100644
index d5071e8a5..000000000
--- a/youtube_dl/extractor/tv2.py
+++ /dev/null
@@ -1,145 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- determine_ext,
- int_or_none,
- float_or_none,
- js_to_json,
- parse_iso8601,
- remove_end,
-)
-
-
-class TV2IE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?tv2\.no/v/(?P<id>\d+)'
- _TEST = {
- 'url': 'http://www.tv2.no/v/916509/',
- 'info_dict': {
- 'id': '916509',
- 'ext': 'mp4',
- 'title': 'Se Frode Gryttens hyllest av Steven Gerrard',
- 'description': 'TV 2 Sportens huspoet tar avskjed med Liverpools kaptein Steven Gerrard.',
- 'timestamp': 1431715610,
- 'upload_date': '20150515',
- 'duration': 156.967,
- 'view_count': int,
- 'categories': list,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- formats = []
- format_urls = []
- for protocol in ('HDS', 'HLS'):
- data = self._download_json(
- 'http://sumo.tv2.no/api/web/asset/%s/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % (video_id, protocol),
- video_id, 'Downloading play JSON')['playback']
- for item in data['items']['item']:
- video_url = item.get('url')
- if not video_url or video_url in format_urls:
- continue
- format_id = '%s-%s' % (protocol.lower(), item.get('mediaFormat'))
- if not self._is_valid_url(video_url, video_id, format_id):
- continue
- format_urls.append(video_url)
- ext = determine_ext(video_url)
- if ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- video_url, video_id, f4m_id=format_id, fatal=False))
- elif ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- video_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id=format_id, fatal=False))
- elif ext == 'ism' or video_url.endswith('.ism/Manifest'):
- pass
- else:
- formats.append({
- 'url': video_url,
- 'format_id': format_id,
- 'tbr': int_or_none(item.get('bitrate')),
- 'filesize': int_or_none(item.get('fileSize')),
- })
- self._sort_formats(formats)
-
- asset = self._download_json(
- 'http://sumo.tv2.no/api/web/asset/%s.json' % video_id,
- video_id, 'Downloading metadata JSON')['asset']
-
- title = asset['title']
- description = asset.get('description')
- timestamp = parse_iso8601(asset.get('createTime'))
- duration = float_or_none(asset.get('accurateDuration') or asset.get('duration'))
- view_count = int_or_none(asset.get('views'))
- categories = asset.get('keywords', '').split(',')
-
- thumbnails = [{
- 'id': thumbnail.get('@type'),
- 'url': thumbnail.get('url'),
- } for _, thumbnail in asset.get('imageVersions', {}).items()]
-
- return {
- 'id': video_id,
- 'url': video_url,
- 'title': title,
- 'description': description,
- 'thumbnails': thumbnails,
- 'timestamp': timestamp,
- 'duration': duration,
- 'view_count': view_count,
- 'categories': categories,
- 'formats': formats,
- }
-
-
-class TV2ArticleIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?tv2\.no/(?:a|\d{4}/\d{2}/\d{2}(/[^/]+)+)/(?P<id>\d+)'
- _TESTS = [{
- 'url': 'http://www.tv2.no/2015/05/16/nyheter/alesund/krim/pingvin/6930542',
- 'info_dict': {
- 'id': '6930542',
- 'title': 'Russen hetses etter pingvintyveri - innrømmer å ha åpnet luken på buret',
- 'description': 'md5:339573779d3eea3542ffe12006190954',
- },
- 'playlist_count': 2,
- }, {
- 'url': 'http://www.tv2.no/a/6930542',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
-
- webpage = self._download_webpage(url, playlist_id)
-
- # Old embed pattern (looks unused nowadays)
- assets = re.findall(r'data-assetid=["\'](\d+)', webpage)
-
- if not assets:
- # New embed pattern
- for v in re.findall(r'TV2ContentboxVideo\(({.+?})\)', webpage):
- video = self._parse_json(
- v, playlist_id, transform_source=js_to_json, fatal=False)
- if not video:
- continue
- asset = video.get('assetId')
- if asset:
- assets.append(asset)
-
- entries = [
- self.url_result('http://www.tv2.no/v/%s' % asset_id, 'TV2')
- for asset_id in assets]
-
- title = remove_end(self._og_search_title(webpage), ' - TV2.no')
- description = remove_end(self._og_search_description(webpage), ' - TV2.no')
-
- return self.playlist_result(entries, playlist_id, title, description)
diff --git a/youtube_dl/extractor/tv4.py b/youtube_dl/extractor/tv4.py
deleted file mode 100644
index a819d048c..000000000
--- a/youtube_dl/extractor/tv4.py
+++ /dev/null
@@ -1,124 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- parse_iso8601,
-)
-
-
-class TV4IE(InfoExtractor):
- IE_DESC = 'tv4.se and tv4play.se'
- _VALID_URL = r'''(?x)https?://(?:www\.)?
- (?:
- tv4\.se/(?:[^/]+)/klipp/(?:.*)-|
- tv4play\.se/
- (?:
- (?:program|barn)/(?:[^/]+/|(?:[^\?]+)\?video_id=)|
- iframe/video/|
- film/|
- sport/|
- )
- )(?P<id>[0-9]+)'''
- _GEO_COUNTRIES = ['SE']
- _TESTS = [
- {
- 'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650',
- 'md5': 'cb837212f342d77cec06e6dad190e96d',
- 'info_dict': {
- 'id': '2491650',
- 'ext': 'mp4',
- 'title': 'Kalla Fakta 5 (english subtitles)',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'timestamp': int,
- 'upload_date': '20131125',
- },
- },
- {
- 'url': 'http://www.tv4play.se/iframe/video/3054113',
- 'md5': 'cb837212f342d77cec06e6dad190e96d',
- 'info_dict': {
- 'id': '3054113',
- 'ext': 'mp4',
- 'title': 'Så här jobbar ficktjuvarna - se avslöjande bilder',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'description': 'Unika bilder avslöjar hur turisternas fickor vittjas mitt på Stockholms central. Två experter på ficktjuvarna avslöjar knepen du ska se upp för.',
- 'timestamp': int,
- 'upload_date': '20150130',
- },
- },
- {
- 'url': 'http://www.tv4play.se/sport/3060959',
- 'only_matching': True,
- },
- {
- 'url': 'http://www.tv4play.se/film/2378136',
- 'only_matching': True,
- },
- {
- 'url': 'http://www.tv4play.se/barn/looney-tunes?video_id=3062412',
- 'only_matching': True,
- },
- {
- 'url': 'http://www.tv4play.se/program/farang/3922081',
- 'only_matching': True,
- }
- ]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- info = self._download_json(
- 'https://playback-api.b17g.net/asset/%s' % video_id,
- video_id, 'Downloading video info JSON', query={
- 'service': 'tv4',
- 'device': 'browser',
- 'protocol': 'hls,dash',
- 'drm': 'widevine',
- })['metadata']
-
- title = info['title']
-
- manifest_url = self._download_json(
- 'https://playback-api.b17g.net/media/' + video_id,
- video_id, query={
- 'service': 'tv4',
- 'device': 'browser',
- 'protocol': 'hls',
- })['playbackItem']['manifestUrl']
- formats = self._extract_m3u8_formats(
- manifest_url, video_id, 'mp4',
- 'm3u8_native', m3u8_id='hls', fatal=False)
- formats.extend(self._extract_mpd_formats(
- manifest_url.replace('.m3u8', '.mpd'),
- video_id, mpd_id='dash', fatal=False))
- formats.extend(self._extract_f4m_formats(
- manifest_url.replace('.m3u8', '.f4m'),
- video_id, f4m_id='hds', fatal=False))
- formats.extend(self._extract_ism_formats(
- re.sub(r'\.ism/.+?\.m3u8', r'.ism/Manifest', manifest_url),
- video_id, ism_id='mss', fatal=False))
-
- if not formats and info.get('is_geo_restricted'):
- self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
-
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- # 'subtitles': subtitles,
- 'description': info.get('description'),
- 'timestamp': parse_iso8601(info.get('broadcast_date_time')),
- 'duration': int_or_none(info.get('duration')),
- 'thumbnail': info.get('image'),
- 'is_live': info.get('isLive') is True,
- 'series': info.get('seriesTitle'),
- 'season_number': int_or_none(info.get('seasonNumber')),
- 'episode': info.get('episodeTitle'),
- 'episode_number': int_or_none(info.get('episodeNumber')),
- }
diff --git a/youtube_dl/extractor/tv5mondeplus.py b/youtube_dl/extractor/tv5mondeplus.py
deleted file mode 100644
index 88b6baa31..000000000
--- a/youtube_dl/extractor/tv5mondeplus.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- clean_html,
- determine_ext,
- extract_attributes,
- get_element_by_class,
- int_or_none,
- parse_duration,
- parse_iso8601,
-)
-
-
-class TV5MondePlusIE(InfoExtractor):
- IE_DESC = 'TV5MONDE+'
- _VALID_URL = r'https?://(?:www\.)?tv5mondeplus\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)'
- _TEST = {
- 'url': 'http://www.tv5mondeplus.com/toutes-les-videos/documentaire/tdah-mon-amour-tele-quebec-tdah-mon-amour-ep001-enfants',
- 'md5': '12130fc199f020673138a83466542ec6',
- 'info_dict': {
- 'id': 'tdah-mon-amour-tele-quebec-tdah-mon-amour-ep001-enfants',
- 'ext': 'mp4',
- 'title': 'Tdah, mon amour - Enfants',
- 'description': 'md5:230e3aca23115afcf8006d1bece6df74',
- 'upload_date': '20170401',
- 'timestamp': 1491022860,
- }
- }
- _GEO_BYPASS = False
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- webpage = self._download_webpage(url, display_id)
-
- if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage:
- self.raise_geo_restricted(countries=['FR'])
-
- series = get_element_by_class('video-detail__title', webpage)
- title = episode = get_element_by_class(
- 'video-detail__subtitle', webpage) or series
- if series and series != title:
- title = '%s - %s' % (series, title)
- vpl_data = extract_attributes(self._search_regex(
- r'(<[^>]+class="video_player_loader"[^>]+>)',
- webpage, 'video player loader'))
-
- video_files = self._parse_json(
- vpl_data['data-broadcast'], display_id).get('files', [])
- formats = []
- for video_file in video_files:
- v_url = video_file.get('url')
- if not v_url:
- continue
- video_format = video_file.get('format') or determine_ext(v_url)
- if video_format == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- v_url, display_id, 'mp4', 'm3u8_native',
- m3u8_id='hls', fatal=False))
- else:
- formats.append({
- 'url': v_url,
- 'format_id': video_format,
- })
- self._sort_formats(formats)
-
- return {
- 'id': display_id,
- 'display_id': display_id,
- 'title': title,
- 'description': clean_html(get_element_by_class('video-detail__description', webpage)),
- 'thumbnail': vpl_data.get('data-image'),
- 'duration': int_or_none(vpl_data.get('data-duration')) or parse_duration(self._html_search_meta('duration', webpage)),
- 'timestamp': parse_iso8601(self._html_search_meta('uploadDate', webpage)),
- 'formats': formats,
- 'episode': episode,
- 'series': series,
- }
diff --git a/youtube_dl/extractor/tva.py b/youtube_dl/extractor/tva.py
deleted file mode 100644
index 0b863df2f..000000000
--- a/youtube_dl/extractor/tva.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- float_or_none,
- smuggle_url,
-)
-
-
-class TVAIE(InfoExtractor):
- _VALID_URL = r'https?://videos\.tva\.ca/details/_(?P<id>\d+)'
- _TEST = {
- 'url': 'https://videos.tva.ca/details/_5596811470001',
- 'info_dict': {
- 'id': '5596811470001',
- 'ext': 'mp4',
- 'title': 'Un extrait de l\'épisode du dimanche 8 octobre 2017 !',
- 'uploader_id': '5481942443001',
- 'upload_date': '20171003',
- 'timestamp': 1507064617,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- }
- }
- BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5481942443001/default_default/index.html?videoId=%s'
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- video_data = self._download_json(
- 'https://videos.tva.ca/proxy/item/_' + video_id, video_id, headers={
- 'Accept': 'application/json',
- }, query={
- 'appId': '5955fc5f23eec60006c951f1',
- })
-
- def get_attribute(key):
- for attribute in video_data.get('attributes', []):
- if attribute.get('key') == key:
- return attribute.get('value')
- return None
-
- return {
- '_type': 'url_transparent',
- 'id': video_id,
- 'title': get_attribute('title'),
- 'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['CA']}),
- 'description': get_attribute('description'),
- 'thumbnail': get_attribute('image-background') or get_attribute('image-landscape'),
- 'duration': float_or_none(get_attribute('video-duration'), 1000),
- 'ie_key': 'BrightcoveNew',
- }
diff --git a/youtube_dl/extractor/tvnow.py b/youtube_dl/extractor/tvnow.py
deleted file mode 100644
index 9c8a8a0dc..000000000
--- a/youtube_dl/extractor/tvnow.py
+++ /dev/null
@@ -1,486 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- ExtractorError,
- int_or_none,
- parse_iso8601,
- parse_duration,
- str_or_none,
- update_url_query,
- urljoin,
-)
-
-
-class TVNowBaseIE(InfoExtractor):
- _VIDEO_FIELDS = (
- 'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
- 'broadcastStartDate', 'isDrm', 'duration', 'season', 'episode',
- 'manifest.dashclear', 'manifest.hlsclear', 'manifest.smoothclear',
- 'format.title', 'format.defaultImage169Format', 'format.defaultImage169Logo')
-
- def _call_api(self, path, video_id, query):
- return self._download_json(
- 'https://api.tvnow.de/v3/' + path, video_id, query=query)
-
- def _extract_video(self, info, display_id):
- video_id = compat_str(info['id'])
- title = info['title']
-
- paths = []
- for manifest_url in (info.get('manifest') or {}).values():
- if not manifest_url:
- continue
- manifest_url = update_url_query(manifest_url, {'filter': ''})
- path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path')
- if path in paths:
- continue
- paths.append(path)
-
- def url_repl(proto, suffix):
- return re.sub(
- r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub(
- r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
- '.ism/' + suffix, manifest_url))
-
- def make_urls(proto, suffix):
- urls = [url_repl(proto, suffix)]
- hd_url = urls[0].replace('/manifest/', '/ngvod/')
- if hd_url != urls[0]:
- urls.append(hd_url)
- return urls
-
- for man_url in make_urls('dash', '.mpd'):
- formats = self._extract_mpd_formats(
- man_url, video_id, mpd_id='dash', fatal=False)
- for man_url in make_urls('hss', 'Manifest'):
- formats.extend(self._extract_ism_formats(
- man_url, video_id, ism_id='mss', fatal=False))
- for man_url in make_urls('hls', '.m3u8'):
- formats.extend(self._extract_m3u8_formats(
- man_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls',
- fatal=False))
- if formats:
- break
- else:
- if info.get('isDrm'):
- raise ExtractorError(
- 'Video %s is DRM protected' % video_id, expected=True)
- if info.get('geoblocked'):
- raise self.raise_geo_restricted()
- if not info.get('free', True):
- raise ExtractorError(
- 'Video %s is not available for free' % video_id, expected=True)
- self._sort_formats(formats)
-
- description = info.get('articleLong') or info.get('articleShort')
- timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
- duration = parse_duration(info.get('duration'))
-
- f = info.get('format', {})
-
- thumbnails = [{
- 'url': 'https://aistvnow-a.akamaihd.net/tvnow/movie/%s' % video_id,
- }]
- thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
- if thumbnail:
- thumbnails.append({
- 'url': thumbnail,
- })
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': description,
- 'thumbnails': thumbnails,
- 'timestamp': timestamp,
- 'duration': duration,
- 'series': f.get('title'),
- 'season_number': int_or_none(info.get('season')),
- 'episode_number': int_or_none(info.get('episode')),
- 'episode': title,
- 'formats': formats,
- }
-
-
-class TVNowIE(TVNowBaseIE):
- _VALID_URL = r'''(?x)
- https?://
- (?:www\.)?tvnow\.(?:de|at|ch)/(?P<station>[^/]+)/
- (?P<show_id>[^/]+)/
- (?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+)
- '''
-
- @classmethod
- def suitable(cls, url):
- return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url) or TVNowShowIE.suitable(url)
- else super(TVNowIE, cls).suitable(url))
-
- _TESTS = [{
- 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player',
- 'info_dict': {
- 'id': '331082',
- 'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
- 'ext': 'mp4',
- 'title': 'Der neue Porsche 911 GT 3',
- 'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
- 'timestamp': 1495994400,
- 'upload_date': '20170528',
- 'duration': 5283,
- 'series': 'GRIP - Das Motormagazin',
- 'season_number': 14,
- 'episode_number': 405,
- 'episode': 'Der neue Porsche 911 GT 3',
- },
- }, {
- # rtl2
- 'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player',
- 'only_matching': True,
- }, {
- # rtlnitro
- 'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player',
- 'only_matching': True,
- }, {
- # superrtl
- 'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player',
- 'only_matching': True,
- }, {
- # ntv
- 'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player',
- 'only_matching': True,
- }, {
- # vox
- 'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player',
- 'only_matching': True,
- }, {
- # rtlplus
- 'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player',
- 'only_matching': True,
- }, {
- 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- display_id = '%s/%s' % mobj.group(2, 3)
-
- info = self._call_api(
- 'movies/' + display_id, display_id, query={
- 'fields': ','.join(self._VIDEO_FIELDS),
- })
-
- return self._extract_video(info, display_id)
-
-
-class TVNowNewIE(InfoExtractor):
- _VALID_URL = r'''(?x)
- (?P<base_url>https?://
- (?:www\.)?tvnow\.(?:de|at|ch)/
- (?:shows|serien))/
- (?P<show>[^/]+)-\d+/
- [^/]+/
- episode-\d+-(?P<episode>[^/?$&]+)-(?P<id>\d+)
- '''
-
- _TESTS = [{
- 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- base_url = re.sub(r'(?:shows|serien)', '_', mobj.group('base_url'))
- show, episode = mobj.group('show', 'episode')
- return self.url_result(
- # Rewrite new URLs to the old format and use extraction via old API
- # at api.tvnow.de as a loophole for bypassing premium content checks
- '%s/%s/%s' % (base_url, show, episode),
- ie=TVNowIE.ie_key(), video_id=mobj.group('id'))
-
-
-class TVNowNewBaseIE(InfoExtractor):
- def _call_api(self, path, video_id, query={}):
- result = self._download_json(
- 'https://apigw.tvnow.de/module/' + path, video_id, query=query)
- error = result.get('error')
- if error:
- raise ExtractorError(
- '%s said: %s' % (self.IE_NAME, error), expected=True)
- return result
-
-
-r"""
-TODO: new apigw.tvnow.de based version of TVNowIE. Replace old TVNowIE with it
-when api.tvnow.de is shut down. This version can't bypass premium checks though.
-class TVNowIE(TVNowNewBaseIE):
- _VALID_URL = r'''(?x)
- https?://
- (?:www\.)?tvnow\.(?:de|at|ch)/
- (?:shows|serien)/[^/]+/
- (?:[^/]+/)+
- (?P<display_id>[^/?$&]+)-(?P<id>\d+)
- '''
-
- _TESTS = [{
- # episode with annual navigation
- 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
- 'info_dict': {
- 'id': '331082',
- 'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
- 'ext': 'mp4',
- 'title': 'Der neue Porsche 911 GT 3',
- 'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'timestamp': 1495994400,
- 'upload_date': '20170528',
- 'duration': 5283,
- 'series': 'GRIP - Das Motormagazin',
- 'season_number': 14,
- 'episode_number': 405,
- 'episode': 'Der neue Porsche 911 GT 3',
- },
- }, {
- # rtl2, episode with season navigation
- 'url': 'https://www.tvnow.de/shows/armes-deutschland-11471/staffel-3/episode-14-bernd-steht-seit-der-trennung-von-seiner-frau-allein-da-526124',
- 'only_matching': True,
- }, {
- # rtlnitro
- 'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13/episode-5-auf-eigene-faust-pilot-366822',
- 'only_matching': True,
- }, {
- # superrtl
- 'url': 'https://www.tvnow.de/shows/die-lustigsten-schlamassel-der-welt-1221/staffel-2/episode-14-u-a-ketchup-effekt-364120',
- 'only_matching': True,
- }, {
- # ntv
- 'url': 'https://www.tvnow.de/shows/startup-news-10674/staffel-2/episode-39-goetter-in-weiss-387630',
- 'only_matching': True,
- }, {
- # vox
- 'url': 'https://www.tvnow.de/shows/auto-mobil-174/2017-11/episode-46-neues-vom-automobilmarkt-2017-11-19-17-00-00-380072',
- 'only_matching': True,
- }, {
- 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
- 'only_matching': True,
- }]
-
- def _extract_video(self, info, url, display_id):
- config = info['config']
- source = config['source']
-
- video_id = compat_str(info.get('id') or source['videoId'])
- title = source['title'].strip()
-
- paths = []
- for manifest_url in (info.get('manifest') or {}).values():
- if not manifest_url:
- continue
- manifest_url = update_url_query(manifest_url, {'filter': ''})
- path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path')
- if path in paths:
- continue
- paths.append(path)
-
- def url_repl(proto, suffix):
- return re.sub(
- r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub(
- r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
- '.ism/' + suffix, manifest_url))
-
- formats = self._extract_mpd_formats(
- url_repl('dash', '.mpd'), video_id,
- mpd_id='dash', fatal=False)
- formats.extend(self._extract_ism_formats(
- url_repl('hss', 'Manifest'),
- video_id, ism_id='mss', fatal=False))
- formats.extend(self._extract_m3u8_formats(
- url_repl('hls', '.m3u8'), video_id, 'mp4',
- 'm3u8_native', m3u8_id='hls', fatal=False))
- if formats:
- break
- else:
- if try_get(info, lambda x: x['rights']['isDrm']):
- raise ExtractorError(
- 'Video %s is DRM protected' % video_id, expected=True)
- if try_get(config, lambda x: x['boards']['geoBlocking']['block']):
- raise self.raise_geo_restricted()
- if not info.get('free', True):
- raise ExtractorError(
- 'Video %s is not available for free' % video_id, expected=True)
- self._sort_formats(formats)
-
- description = source.get('description')
- thumbnail = url_or_none(source.get('poster'))
- timestamp = unified_timestamp(source.get('previewStart'))
- duration = parse_duration(source.get('length'))
-
- series = source.get('format')
- season_number = int_or_none(self._search_regex(
- r'staffel-(\d+)', url, 'season number', default=None))
- episode_number = int_or_none(self._search_regex(
- r'episode-(\d+)', url, 'episode number', default=None))
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'timestamp': timestamp,
- 'duration': duration,
- 'series': series,
- 'season_number': season_number,
- 'episode_number': episode_number,
- 'episode': title,
- 'formats': formats,
- }
-
- def _real_extract(self, url):
- display_id, video_id = re.match(self._VALID_URL, url).groups()
- info = self._call_api('player/' + video_id, video_id)
- return self._extract_video(info, video_id, display_id)
-"""
-
-
-class TVNowListBaseIE(TVNowNewBaseIE):
- _SHOW_VALID_URL = r'''(?x)
- (?P<base_url>
- https?://
- (?:www\.)?tvnow\.(?:de|at|ch)/(?:shows|serien)/
- [^/?#&]+-(?P<show_id>\d+)
- )
- '''
-
- @classmethod
- def suitable(cls, url):
- return (False if TVNowNewIE.suitable(url)
- else super(TVNowListBaseIE, cls).suitable(url))
-
- def _extract_items(self, url, show_id, list_id, query):
- items = self._call_api(
- 'teaserrow/format/episode/' + show_id, list_id,
- query=query)['items']
-
- entries = []
- for item in items:
- if not isinstance(item, dict):
- continue
- item_url = urljoin(url, item.get('url'))
- if not item_url:
- continue
- video_id = str_or_none(item.get('id') or item.get('videoId'))
- item_title = item.get('subheadline') or item.get('text')
- entries.append(self.url_result(
- item_url, ie=TVNowNewIE.ie_key(), video_id=video_id,
- video_title=item_title))
-
- return self.playlist_result(entries, '%s/%s' % (show_id, list_id))
-
-
-class TVNowSeasonIE(TVNowListBaseIE):
- _VALID_URL = r'%s/staffel-(?P<id>\d+)' % TVNowListBaseIE._SHOW_VALID_URL
- _TESTS = [{
- 'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13',
- 'info_dict': {
- 'id': '1815/13',
- },
- 'playlist_mincount': 22,
- }]
-
- def _real_extract(self, url):
- _, show_id, season_id = re.match(self._VALID_URL, url).groups()
- return self._extract_items(
- url, show_id, season_id, {'season': season_id})
-
-
-class TVNowAnnualIE(TVNowListBaseIE):
- _VALID_URL = r'%s/(?P<year>\d{4})-(?P<month>\d{2})' % TVNowListBaseIE._SHOW_VALID_URL
- _TESTS = [{
- 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05',
- 'info_dict': {
- 'id': '1669/2017-05',
- },
- 'playlist_mincount': 2,
- }]
-
- def _real_extract(self, url):
- _, show_id, year, month = re.match(self._VALID_URL, url).groups()
- return self._extract_items(
- url, show_id, '%s-%s' % (year, month), {
- 'year': int(year),
- 'month': int(month),
- })
-
-
-class TVNowShowIE(TVNowListBaseIE):
- _VALID_URL = TVNowListBaseIE._SHOW_VALID_URL
- _TESTS = [{
- # annual navigationType
- 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669',
- 'info_dict': {
- 'id': '1669',
- },
- 'playlist_mincount': 73,
- }, {
- # season navigationType
- 'url': 'https://www.tvnow.de/shows/armes-deutschland-11471',
- 'info_dict': {
- 'id': '11471',
- },
- 'playlist_mincount': 3,
- }]
-
- @classmethod
- def suitable(cls, url):
- return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url)
- else super(TVNowShowIE, cls).suitable(url))
-
- def _real_extract(self, url):
- base_url, show_id = re.match(self._VALID_URL, url).groups()
-
- result = self._call_api(
- 'teaserrow/format/navigation/' + show_id, show_id)
-
- items = result['items']
-
- entries = []
- navigation = result.get('navigationType')
- if navigation == 'annual':
- for item in items:
- if not isinstance(item, dict):
- continue
- year = int_or_none(item.get('year'))
- if year is None:
- continue
- months = item.get('months')
- if not isinstance(months, list):
- continue
- for month_dict in months:
- if not isinstance(month_dict, dict) or not month_dict:
- continue
- month_number = int_or_none(list(month_dict.keys())[0])
- if month_number is None:
- continue
- entries.append(self.url_result(
- '%s/%04d-%02d' % (base_url, year, month_number),
- ie=TVNowAnnualIE.ie_key()))
- elif navigation == 'season':
- for item in items:
- if not isinstance(item, dict):
- continue
- season_number = int_or_none(item.get('season'))
- if season_number is None:
- continue
- entries.append(self.url_result(
- '%s/staffel-%d' % (base_url, season_number),
- ie=TVNowSeasonIE.ie_key()))
- else:
- raise ExtractorError('Unknown navigationType')
-
- return self.playlist_result(entries, show_id)
diff --git a/youtube_dl/extractor/tvplay.py b/youtube_dl/extractor/tvplay.py
deleted file mode 100644
index d82d48f94..000000000
--- a/youtube_dl/extractor/tvplay.py
+++ /dev/null
@@ -1,557 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_str,
- compat_urlparse,
-)
-from ..utils import (
- determine_ext,
- ExtractorError,
- int_or_none,
- parse_iso8601,
- qualities,
- smuggle_url,
- try_get,
- unsmuggle_url,
- update_url_query,
- url_or_none,
-)
-
-
-class TVPlayIE(InfoExtractor):
- IE_NAME = 'mtg'
- IE_DESC = 'MTG services'
- _VALID_URL = r'''(?x)
- (?:
- mtg:|
- https?://
- (?:www\.)?
- (?:
- tvplay(?:\.skaties)?\.lv(?:/parraides)?|
- (?:tv3play|play\.tv3)\.lt(?:/programos)?|
- tv3play(?:\.tv3)?\.ee/sisu|
- (?:tv(?:3|6|8|10)play|viafree)\.se/program|
- (?:(?:tv3play|viasat4play|tv6play|viafree)\.no|(?:tv3play|viafree)\.dk)/programmer|
- play\.nova(?:tv)?\.bg/programi
- )
- /(?:[^/]+/)+
- )
- (?P<id>\d+)
- '''
- _TESTS = [
- {
- 'url': 'http://www.tvplay.lv/parraides/vinas-melo-labak/418113?autostart=true',
- 'md5': 'a1612fe0849455423ad8718fe049be21',
- 'info_dict': {
- 'id': '418113',
- 'ext': 'mp4',
- 'title': 'Kādi ir īri? - Viņas melo labāk',
- 'description': 'Baiba apsmej īrus, kādi tie ir un ko viņi dara.',
- 'series': 'Viņas melo labāk',
- 'season': '2.sezona',
- 'season_number': 2,
- 'duration': 25,
- 'timestamp': 1406097056,
- 'upload_date': '20140723',
- },
- },
- {
- 'url': 'http://play.tv3.lt/programos/moterys-meluoja-geriau/409229?autostart=true',
- 'info_dict': {
- 'id': '409229',
- 'ext': 'flv',
- 'title': 'Moterys meluoja geriau',
- 'description': 'md5:9aec0fc68e2cbc992d2a140bd41fa89e',
- 'series': 'Moterys meluoja geriau',
- 'episode_number': 47,
- 'season': '1 sezonas',
- 'season_number': 1,
- 'duration': 1330,
- 'timestamp': 1403769181,
- 'upload_date': '20140626',
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
- },
- {
- 'url': 'http://www.tv3play.ee/sisu/kodu-keset-linna/238551?autostart=true',
- 'info_dict': {
- 'id': '238551',
- 'ext': 'flv',
- 'title': 'Kodu keset linna 398537',
- 'description': 'md5:7df175e3c94db9e47c0d81ffa5d68701',
- 'duration': 1257,
- 'timestamp': 1292449761,
- 'upload_date': '20101215',
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
- },
- {
- 'url': 'http://www.tv3play.se/program/husraddarna/395385?autostart=true',
- 'info_dict': {
- 'id': '395385',
- 'ext': 'mp4',
- 'title': 'Husräddarna S02E07',
- 'description': 'md5:f210c6c89f42d4fc39faa551be813777',
- 'duration': 2574,
- 'timestamp': 1400596321,
- 'upload_date': '20140520',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- 'url': 'http://www.tv6play.se/program/den-sista-dokusapan/266636?autostart=true',
- 'info_dict': {
- 'id': '266636',
- 'ext': 'mp4',
- 'title': 'Den sista dokusåpan S01E08',
- 'description': 'md5:295be39c872520221b933830f660b110',
- 'duration': 1492,
- 'timestamp': 1330522854,
- 'upload_date': '20120229',
- 'age_limit': 18,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- 'url': 'http://www.tv8play.se/program/antikjakten/282756?autostart=true',
- 'info_dict': {
- 'id': '282756',
- 'ext': 'mp4',
- 'title': 'Antikjakten S01E10',
- 'description': 'md5:1b201169beabd97e20c5ad0ad67b13b8',
- 'duration': 2646,
- 'timestamp': 1348575868,
- 'upload_date': '20120925',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- 'url': 'http://www.tv3play.no/programmer/anna-anka-soker-assistent/230898?autostart=true',
- 'info_dict': {
- 'id': '230898',
- 'ext': 'mp4',
- 'title': 'Anna Anka søker assistent - Ep. 8',
- 'description': 'md5:f80916bf5bbe1c5f760d127f8dd71474',
- 'duration': 2656,
- 'timestamp': 1277720005,
- 'upload_date': '20100628',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- 'url': 'http://www.viasat4play.no/programmer/budbringerne/21873?autostart=true',
- 'info_dict': {
- 'id': '21873',
- 'ext': 'mp4',
- 'title': 'Budbringerne program 10',
- 'description': 'md5:4db78dc4ec8a85bb04fd322a3ee5092d',
- 'duration': 1297,
- 'timestamp': 1254205102,
- 'upload_date': '20090929',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- 'url': 'http://www.tv6play.no/programmer/hotelinspektor-alex-polizzi/361883?autostart=true',
- 'info_dict': {
- 'id': '361883',
- 'ext': 'mp4',
- 'title': 'Hotelinspektør Alex Polizzi - Ep. 10',
- 'description': 'md5:3ecf808db9ec96c862c8ecb3a7fdaf81',
- 'duration': 2594,
- 'timestamp': 1393236292,
- 'upload_date': '20140224',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- 'url': 'http://play.novatv.bg/programi/zdravei-bulgariya/624952?autostart=true',
- 'info_dict': {
- 'id': '624952',
- 'ext': 'flv',
- 'title': 'Здравей, България (12.06.2015 г.) ',
- 'description': 'md5:99f3700451ac5bb71a260268b8daefd7',
- 'duration': 8838,
- 'timestamp': 1434100372,
- 'upload_date': '20150612',
- },
- 'params': {
- # rtmp download
- 'skip_download': True,
- },
- },
- {
- 'url': 'https://play.nova.bg/programi/zdravei-bulgariya/764300?autostart=true',
- 'only_matching': True,
- },
- {
- 'url': 'http://tvplay.skaties.lv/parraides/vinas-melo-labak/418113?autostart=true',
- 'only_matching': True,
- },
- {
- 'url': 'https://tvplay.skaties.lv/vinas-melo-labak/418113/?autostart=true',
- 'only_matching': True,
- },
- {
- # views is null
- 'url': 'http://tvplay.skaties.lv/parraides/tv3-zinas/760183',
- 'only_matching': True,
- },
- {
- 'url': 'http://tv3play.tv3.ee/sisu/kodu-keset-linna/238551?autostart=true',
- 'only_matching': True,
- },
- {
- 'url': 'http://www.viafree.se/program/underhallning/i-like-radio-live/sasong-1/676869',
- 'only_matching': True,
- },
- {
- 'url': 'mtg:418113',
- 'only_matching': True,
- }
- ]
-
- def _real_extract(self, url):
- url, smuggled_data = unsmuggle_url(url, {})
- self._initialize_geo_bypass({
- 'countries': smuggled_data.get('geo_countries'),
- })
-
- video_id = self._match_id(url)
- geo_country = self._search_regex(
- r'https?://[^/]+\.([a-z]{2})', url,
- 'geo country', default=None)
- if geo_country:
- self._initialize_geo_bypass({'countries': [geo_country.upper()]})
- video = self._download_json(
- 'http://playapi.mtgx.tv/v3/videos/%s' % video_id, video_id, 'Downloading video JSON')
-
- title = video['title']
-
- try:
- streams = self._download_json(
- 'http://playapi.mtgx.tv/v3/videos/stream/%s' % video_id,
- video_id, 'Downloading streams JSON')
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
- msg = self._parse_json(e.cause.read().decode('utf-8'), video_id)
- raise ExtractorError(msg['msg'], expected=True)
- raise
-
- quality = qualities(['hls', 'medium', 'high'])
- formats = []
- for format_id, video_url in streams.get('streams', {}).items():
- video_url = url_or_none(video_url)
- if not video_url:
- continue
- ext = determine_ext(video_url)
- if ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- update_url_query(video_url, {
- 'hdcore': '3.5.0',
- 'plugin': 'aasp-3.5.0.151.81'
- }), video_id, f4m_id='hds', fatal=False))
- elif ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- video_url, video_id, 'mp4', 'm3u8_native',
- m3u8_id='hls', fatal=False))
- else:
- fmt = {
- 'format_id': format_id,
- 'quality': quality(format_id),
- 'ext': ext,
- }
- if video_url.startswith('rtmp'):
- if smuggled_data.get('skip_rtmp'):
- continue
- m = re.search(
- r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', video_url)
- if not m:
- continue
- fmt.update({
- 'ext': 'flv',
- 'url': m.group('url'),
- 'app': m.group('app'),
- 'play_path': m.group('playpath'),
- 'preference': -1,
- })
- else:
- fmt.update({
- 'url': video_url,
- })
- formats.append(fmt)
-
- if not formats and video.get('is_geo_blocked'):
- self.raise_geo_restricted(
- 'This content might not be available in your country due to copyright reasons')
-
- self._sort_formats(formats)
-
- # TODO: webvtt in m3u8
- subtitles = {}
- sami_path = video.get('sami_path')
- if sami_path:
- lang = self._search_regex(
- r'_([a-z]{2})\.xml', sami_path, 'lang',
- default=compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1])
- subtitles[lang] = [{
- 'url': sami_path,
- }]
-
- series = video.get('format_title')
- episode_number = int_or_none(video.get('format_position', {}).get('episode'))
- season = video.get('_embedded', {}).get('season', {}).get('title')
- season_number = int_or_none(video.get('format_position', {}).get('season'))
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': video.get('description'),
- 'series': series,
- 'episode_number': episode_number,
- 'season': season,
- 'season_number': season_number,
- 'duration': int_or_none(video.get('duration')),
- 'timestamp': parse_iso8601(video.get('created_at')),
- 'view_count': try_get(video, lambda x: x['views']['total'], int),
- 'age_limit': int_or_none(video.get('age_limit', 0)),
- 'formats': formats,
- 'subtitles': subtitles,
- }
-
-
-class ViafreeIE(InfoExtractor):
- _VALID_URL = r'''(?x)
- https?://
- (?:www\.)?
- viafree\.
- (?:
- (?:dk|no)/programmer|
- se/program
- )
- /(?:[^/]+/)+(?P<id>[^/?#&]+)
- '''
- _TESTS = [{
- 'url': 'http://www.viafree.se/program/livsstil/husraddarna/sasong-2/avsnitt-2',
- 'info_dict': {
- 'id': '395375',
- 'ext': 'mp4',
- 'title': 'Husräddarna S02E02',
- 'description': 'md5:4db5c933e37db629b5a2f75dfb34829e',
- 'series': 'Husräddarna',
- 'season': 'Säsong 2',
- 'season_number': 2,
- 'duration': 2576,
- 'timestamp': 1400596321,
- 'upload_date': '20140520',
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': [TVPlayIE.ie_key()],
- }, {
- # with relatedClips
- 'url': 'http://www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-1',
- 'info_dict': {
- 'id': '758770',
- 'ext': 'mp4',
- 'title': 'Sommaren med YouTube-stjärnorna S01E01',
- 'description': 'md5:2bc69dce2c4bb48391e858539bbb0e3f',
- 'series': 'Sommaren med YouTube-stjärnorna',
- 'season': 'Säsong 1',
- 'season_number': 1,
- 'duration': 1326,
- 'timestamp': 1470905572,
- 'upload_date': '20160811',
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': [TVPlayIE.ie_key()],
- }, {
- # Different og:image URL schema
- 'url': 'http://www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-2',
- 'only_matching': True,
- }, {
- 'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1',
- 'only_matching': True,
- }, {
- 'url': 'http://www.viafree.dk/programmer/reality/paradise-hotel/saeson-7/episode-5',
- 'only_matching': True,
- }]
-
- @classmethod
- def suitable(cls, url):
- return False if TVPlayIE.suitable(url) else super(ViafreeIE, cls).suitable(url)
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- data = self._parse_json(
- self._search_regex(
- r'(?s)window\.App\s*=\s*({.+?})\s*;\s*</script',
- webpage, 'data', default='{}'),
- video_id, transform_source=lambda x: re.sub(
- r'(?s)function\s+[a-zA-Z_][\da-zA-Z_]*\s*\([^)]*\)\s*{[^}]*}\s*',
- 'null', x), fatal=False)
-
- video_id = None
-
- if data:
- video_id = try_get(
- data, lambda x: x['context']['dispatcher']['stores'][
- 'ContentPageProgramStore']['currentVideo']['id'],
- compat_str)
-
- # Fallback #1 (extract from og:image URL schema)
- if not video_id:
- thumbnail = self._og_search_thumbnail(webpage, default=None)
- if thumbnail:
- video_id = self._search_regex(
- # Patterns seen:
- # http://cdn.playapi.mtgx.tv/imagecache/600x315/cloud/content-images/inbox/765166/a2e95e5f1d735bab9f309fa345cc3f25.jpg
- # http://cdn.playapi.mtgx.tv/imagecache/600x315/cloud/content-images/seasons/15204/758770/4a5ba509ca8bc043e1ebd1a76131cdf2.jpg
- r'https?://[^/]+/imagecache/(?:[^/]+/)+(\d{6,})/',
- thumbnail, 'video id', default=None)
-
- # Fallback #2. Extract from raw JSON string.
- # May extract wrong video id if relatedClips is present.
- if not video_id:
- video_id = self._search_regex(
- r'currentVideo["\']\s*:\s*.+?["\']id["\']\s*:\s*["\'](\d{6,})',
- webpage, 'video id')
-
- return self.url_result(
- smuggle_url(
- 'mtg:%s' % video_id,
- {
- 'geo_countries': [
- compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1]],
- # rtmp host mtgfs.fplive.net for viafree is unresolvable
- 'skip_rtmp': True,
- }),
- ie=TVPlayIE.ie_key(), video_id=video_id)
-
-
-class TVPlayHomeIE(InfoExtractor):
- _VALID_URL = r'https?://tvplay\.(?:tv3\.lt|skaties\.lv|tv3\.ee)/[^/]+/[^/?#&]+-(?P<id>\d+)'
- _TESTS = [{
- 'url': 'https://tvplay.tv3.lt/aferistai-n-7/aferistai-10047125/',
- 'info_dict': {
- 'id': '366367',
- 'ext': 'mp4',
- 'title': 'Aferistai',
- 'description': 'Aferistai. Kalėdinė pasaka.',
- 'series': 'Aferistai [N-7]',
- 'season': '1 sezonas',
- 'season_number': 1,
- 'duration': 464,
- 'timestamp': 1394209658,
- 'upload_date': '20140307',
- 'age_limit': 18,
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': [TVPlayIE.ie_key()],
- }, {
- 'url': 'https://tvplay.skaties.lv/vinas-melo-labak/vinas-melo-labak-10280317/',
- 'only_matching': True,
- }, {
- 'url': 'https://tvplay.tv3.ee/cool-d-ga-mehhikosse/cool-d-ga-mehhikosse-10044354/',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- video_id = self._search_regex(
- r'data-asset-id\s*=\s*["\'](\d{5,})\b', webpage, 'video id')
-
- if len(video_id) < 8:
- return self.url_result(
- 'mtg:%s' % video_id, ie=TVPlayIE.ie_key(), video_id=video_id)
-
- m3u8_url = self._search_regex(
- r'data-file\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
- 'm3u8 url', group='url')
-
- formats = self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls')
- self._sort_formats(formats)
-
- title = self._search_regex(
- r'data-title\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
- 'title', default=None, group='value') or self._html_search_meta(
- 'title', webpage, default=None) or self._og_search_title(
- webpage)
-
- description = self._html_search_meta(
- 'description', webpage,
- default=None) or self._og_search_description(webpage)
-
- thumbnail = self._search_regex(
- r'data-image\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
- 'thumbnail', default=None, group='url') or self._html_search_meta(
- 'thumbnail', webpage, default=None) or self._og_search_thumbnail(
- webpage)
-
- duration = int_or_none(self._search_regex(
- r'data-duration\s*=\s*["\'](\d+)', webpage, 'duration',
- fatal=False))
-
- season = self._search_regex(
- (r'data-series-title\s*=\s*(["\'])[^/]+/(?P<value>(?:(?!\1).)+)\1',
- r'\bseason\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
- 'season', default=None, group='value')
- season_number = int_or_none(self._search_regex(
- r'(\d+)(?:[.\s]+sezona|\s+HOOAEG)', season or '', 'season number',
- default=None))
- episode = self._search_regex(
- (r'\bepisode\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
- r'data-subtitle\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
- 'episode', default=None, group='value')
- episode_number = int_or_none(self._search_regex(
- r'(?:S[eē]rija|Osa)\s+(\d+)', episode or '', 'episode number',
- default=None))
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'season': season,
- 'season_number': season_number,
- 'episode': episode,
- 'episode_number': episode_number,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/twentyfourvideo.py b/youtube_dl/extractor/twentyfourvideo.py
deleted file mode 100644
index 1d66eeaff..000000000
--- a/youtube_dl/extractor/twentyfourvideo.py
+++ /dev/null
@@ -1,127 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- parse_iso8601,
- int_or_none,
- xpath_attr,
- xpath_element,
-)
-
-
-class TwentyFourVideoIE(InfoExtractor):
- IE_NAME = '24video'
- _VALID_URL = r'''(?x)
- https?://
- (?P<host>
- (?:(?:www|porno)\.)?24video\.
- (?:net|me|xxx|sexy?|tube|adult|site)
- )/
- (?:
- video/(?:(?:view|xml)/)?|
- player/new24_play\.swf\?id=
- )
- (?P<id>\d+)
- '''
-
- _TESTS = [{
- 'url': 'http://www.24video.net/video/view/1044982',
- 'md5': 'e09fc0901d9eaeedac872f154931deeb',
- 'info_dict': {
- 'id': '1044982',
- 'ext': 'mp4',
- 'title': 'Эротика каменного века',
- 'description': 'Как смотрели порно в каменном веке.',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'SUPERTELO',
- 'duration': 31,
- 'timestamp': 1275937857,
- 'upload_date': '20100607',
- 'age_limit': 18,
- 'like_count': int,
- 'dislike_count': int,
- },
- }, {
- 'url': 'http://www.24video.net/player/new24_play.swf?id=1044982',
- 'only_matching': True,
- }, {
- 'url': 'http://www.24video.me/video/view/1044982',
- 'only_matching': True,
- }, {
- 'url': 'http://www.24video.tube/video/view/2363750',
- 'only_matching': True,
- }, {
- 'url': 'https://www.24video.site/video/view/2640421',
- 'only_matching': True,
- }, {
- 'url': 'https://porno.24video.net/video/2640421-vsya-takaya-gibkaya-i-v-masle',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- host = mobj.group('host')
-
- webpage = self._download_webpage(
- 'http://%s/video/view/%s' % (host, video_id), video_id)
-
- title = self._og_search_title(webpage)
- description = self._html_search_regex(
- r'<(p|span)[^>]+itemprop="description"[^>]*>(?P<description>[^<]+)</\1>',
- webpage, 'description', fatal=False, group='description')
- thumbnail = self._og_search_thumbnail(webpage)
- duration = int_or_none(self._og_search_property(
- 'duration', webpage, 'duration', fatal=False))
- timestamp = parse_iso8601(self._search_regex(
- r'<time[^>]+\bdatetime="([^"]+)"[^>]+itemprop="uploadDate"',
- webpage, 'upload date', fatal=False))
-
- uploader = self._html_search_regex(
- r'class="video-uploaded"[^>]*>\s*<a href="/jsecUser/movies/[^"]+"[^>]*>([^<]+)</a>',
- webpage, 'uploader', fatal=False)
-
- view_count = int_or_none(self._html_search_regex(
- r'<span class="video-views">(\d+) просмотр',
- webpage, 'view count', fatal=False))
- comment_count = int_or_none(self._html_search_regex(
- r'<a[^>]+href="#tab-comments"[^>]*>(\d+) комментари',
- webpage, 'comment count', default=None))
-
- # Sets some cookies
- self._download_xml(
- r'http://%s/video/xml/%s?mode=init' % (host, video_id),
- video_id, 'Downloading init XML')
-
- video_xml = self._download_xml(
- 'http://%s/video/xml/%s?mode=play' % (host, video_id),
- video_id, 'Downloading video XML')
-
- video = xpath_element(video_xml, './/video', 'video', fatal=True)
-
- formats = [{
- 'url': xpath_attr(video, '', 'url', 'video URL', fatal=True),
- }]
-
- like_count = int_or_none(video.get('ratingPlus'))
- dislike_count = int_or_none(video.get('ratingMinus'))
- age_limit = 18 if video.get('adult') == 'true' else 0
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'uploader': uploader,
- 'duration': duration,
- 'timestamp': timestamp,
- 'view_count': view_count,
- 'comment_count': comment_count,
- 'like_count': like_count,
- 'dislike_count': dislike_count,
- 'age_limit': age_limit,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py
deleted file mode 100644
index 0500e33a6..000000000
--- a/youtube_dl/extractor/twitch.py
+++ /dev/null
@@ -1,731 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import itertools
-import re
-import random
-import json
-
-from .common import InfoExtractor
-from ..compat import (
- compat_kwargs,
- compat_parse_qs,
- compat_str,
- compat_urllib_parse_urlencode,
- compat_urllib_parse_urlparse,
-)
-from ..utils import (
- clean_html,
- ExtractorError,
- float_or_none,
- int_or_none,
- orderedSet,
- parse_duration,
- parse_iso8601,
- qualities,
- try_get,
- unified_timestamp,
- update_url_query,
- url_or_none,
- urljoin,
-)
-
-
-class TwitchBaseIE(InfoExtractor):
- _VALID_URL_BASE = r'https?://(?:(?:www|go|m)\.)?twitch\.tv'
-
- _API_BASE = 'https://api.twitch.tv'
- _USHER_BASE = 'https://usher.ttvnw.net'
- _LOGIN_FORM_URL = 'https://www.twitch.tv/login'
- _LOGIN_POST_URL = 'https://passport.twitch.tv/login'
- _CLIENT_ID = 'kimne78kx3ncx6brgo4mv6wki5h1ko'
- _NETRC_MACHINE = 'twitch'
-
- def _handle_error(self, response):
- if not isinstance(response, dict):
- return
- error = response.get('error')
- if error:
- raise ExtractorError(
- '%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')),
- expected=True)
-
- def _call_api(self, path, item_id, *args, **kwargs):
- headers = kwargs.get('headers', {}).copy()
- headers['Client-ID'] = self._CLIENT_ID
- kwargs['headers'] = headers
- response = self._download_json(
- '%s/%s' % (self._API_BASE, path), item_id,
- *args, **compat_kwargs(kwargs))
- self._handle_error(response)
- return response
-
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
- def fail(message):
- raise ExtractorError(
- 'Unable to login. Twitch said: %s' % message, expected=True)
-
- def login_step(page, urlh, note, data):
- form = self._hidden_inputs(page)
- form.update(data)
-
- page_url = urlh.geturl()
- post_url = self._search_regex(
- r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page,
- 'post url', default=self._LOGIN_POST_URL, group='url')
- post_url = urljoin(page_url, post_url)
-
- headers = {
- 'Referer': page_url,
- 'Origin': page_url,
- 'Content-Type': 'text/plain;charset=UTF-8',
- }
-
- response = self._download_json(
- post_url, None, note, data=json.dumps(form).encode(),
- headers=headers, expected_status=400)
- error = response.get('error_description') or response.get('error_code')
- if error:
- fail(error)
-
- if 'Authenticated successfully' in response.get('message', ''):
- return None, None
-
- redirect_url = urljoin(
- post_url,
- response.get('redirect') or response['redirect_path'])
- return self._download_webpage_handle(
- redirect_url, None, 'Downloading login redirect page',
- headers=headers)
-
- login_page, handle = self._download_webpage_handle(
- self._LOGIN_FORM_URL, None, 'Downloading login page')
-
- # Some TOR nodes and public proxies are blocked completely
- if 'blacklist_message' in login_page:
- fail(clean_html(login_page))
-
- redirect_page, handle = login_step(
- login_page, handle, 'Logging in', {
- 'username': username,
- 'password': password,
- 'client_id': self._CLIENT_ID,
- })
-
- # Successful login
- if not redirect_page:
- return
-
- if re.search(r'(?i)<form[^>]+id="two-factor-submit"', redirect_page) is not None:
- # TODO: Add mechanism to request an SMS or phone call
- tfa_token = self._get_tfa_info('two-factor authentication token')
- login_step(redirect_page, handle, 'Submitting TFA token', {
- 'authy_token': tfa_token,
- 'remember_2fa': 'true',
- })
-
- def _prefer_source(self, formats):
- try:
- source = next(f for f in formats if f['format_id'] == 'Source')
- source['quality'] = 10
- except StopIteration:
- for f in formats:
- if '/chunked/' in f['url']:
- f.update({
- 'quality': 10,
- 'format_note': 'Source',
- })
- self._sort_formats(formats)
-
-
-class TwitchItemBaseIE(TwitchBaseIE):
- def _download_info(self, item, item_id):
- return self._extract_info(self._call_api(
- 'kraken/videos/%s%s' % (item, item_id), item_id,
- 'Downloading %s info JSON' % self._ITEM_TYPE))
-
- def _extract_media(self, item_id):
- info = self._download_info(self._ITEM_SHORTCUT, item_id)
- response = self._call_api(
- 'api/videos/%s%s' % (self._ITEM_SHORTCUT, item_id), item_id,
- 'Downloading %s playlist JSON' % self._ITEM_TYPE)
- entries = []
- chunks = response['chunks']
- qualities = list(chunks.keys())
- for num, fragment in enumerate(zip(*chunks.values()), start=1):
- formats = []
- for fmt_num, fragment_fmt in enumerate(fragment):
- format_id = qualities[fmt_num]
- fmt = {
- 'url': fragment_fmt['url'],
- 'format_id': format_id,
- 'quality': 1 if format_id == 'live' else 0,
- }
- m = re.search(r'^(?P<height>\d+)[Pp]', format_id)
- if m:
- fmt['height'] = int(m.group('height'))
- formats.append(fmt)
- self._sort_formats(formats)
- entry = dict(info)
- entry['id'] = '%s_%d' % (entry['id'], num)
- entry['title'] = '%s part %d' % (entry['title'], num)
- entry['formats'] = formats
- entries.append(entry)
- return self.playlist_result(entries, info['id'], info['title'])
-
- def _extract_info(self, info):
- status = info.get('status')
- if status == 'recording':
- is_live = True
- elif status == 'recorded':
- is_live = False
- else:
- is_live = None
- return {
- 'id': info['_id'],
- 'title': info.get('title') or 'Untitled Broadcast',
- 'description': info.get('description'),
- 'duration': int_or_none(info.get('length')),
- 'thumbnail': info.get('preview'),
- 'uploader': info.get('channel', {}).get('display_name'),
- 'uploader_id': info.get('channel', {}).get('name'),
- 'timestamp': parse_iso8601(info.get('recorded_at')),
- 'view_count': int_or_none(info.get('views')),
- 'is_live': is_live,
- }
-
- def _real_extract(self, url):
- return self._extract_media(self._match_id(url))
-
-
-class TwitchVideoIE(TwitchItemBaseIE):
- IE_NAME = 'twitch:video'
- _VALID_URL = r'%s/[^/]+/b/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
- _ITEM_TYPE = 'video'
- _ITEM_SHORTCUT = 'a'
-
- _TEST = {
- 'url': 'http://www.twitch.tv/riotgames/b/577357806',
- 'info_dict': {
- 'id': 'a577357806',
- 'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG',
- },
- 'playlist_mincount': 12,
- 'skip': 'HTTP Error 404: Not Found',
- }
-
-
-class TwitchChapterIE(TwitchItemBaseIE):
- IE_NAME = 'twitch:chapter'
- _VALID_URL = r'%s/[^/]+/c/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
- _ITEM_TYPE = 'chapter'
- _ITEM_SHORTCUT = 'c'
-
- _TESTS = [{
- 'url': 'http://www.twitch.tv/acracingleague/c/5285812',
- 'info_dict': {
- 'id': 'c5285812',
- 'title': 'ACRL Off Season - Sports Cars @ Nordschleife',
- },
- 'playlist_mincount': 3,
- 'skip': 'HTTP Error 404: Not Found',
- }, {
- 'url': 'http://www.twitch.tv/tsm_theoddone/c/2349361',
- 'only_matching': True,
- }]
-
-
-class TwitchVodIE(TwitchItemBaseIE):
- IE_NAME = 'twitch:vod'
- _VALID_URL = r'''(?x)
- https?://
- (?:
- (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/|
- player\.twitch\.tv/\?.*?\bvideo=v
- )
- (?P<id>\d+)
- '''
- _ITEM_TYPE = 'vod'
- _ITEM_SHORTCUT = 'v'
-
- _TESTS = [{
- 'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s',
- 'info_dict': {
- 'id': 'v6528877',
- 'ext': 'mp4',
- 'title': 'LCK Summer Split - Week 6 Day 1',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 17208,
- 'timestamp': 1435131709,
- 'upload_date': '20150624',
- 'uploader': 'Riot Games',
- 'uploader_id': 'riotgames',
- 'view_count': int,
- 'start_time': 310,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- # Untitled broadcast (title is None)
- 'url': 'http://www.twitch.tv/belkao_o/v/11230755',
- 'info_dict': {
- 'id': 'v11230755',
- 'ext': 'mp4',
- 'title': 'Untitled Broadcast',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 1638,
- 'timestamp': 1439746708,
- 'upload_date': '20150816',
- 'uploader': 'BelkAO_o',
- 'uploader_id': 'belkao_o',
- 'view_count': int,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- 'skip': 'HTTP Error 404: Not Found',
- }, {
- 'url': 'http://player.twitch.tv/?t=5m10s&video=v6528877',
- 'only_matching': True,
- }, {
- 'url': 'https://www.twitch.tv/videos/6528877',
- 'only_matching': True,
- }, {
- 'url': 'https://m.twitch.tv/beagsandjam/v/247478721',
- 'only_matching': True,
- }, {
- 'url': 'https://www.twitch.tv/northernlion/video/291940395',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- item_id = self._match_id(url)
-
- info = self._download_info(self._ITEM_SHORTCUT, item_id)
- access_token = self._call_api(
- 'api/vods/%s/access_token' % item_id, item_id,
- 'Downloading %s access token' % self._ITEM_TYPE)
-
- formats = self._extract_m3u8_formats(
- '%s/vod/%s.m3u8?%s' % (
- self._USHER_BASE, item_id,
- compat_urllib_parse_urlencode({
- 'allow_source': 'true',
- 'allow_audio_only': 'true',
- 'allow_spectre': 'true',
- 'player': 'twitchweb',
- 'nauth': access_token['token'],
- 'nauthsig': access_token['sig'],
- })),
- item_id, 'mp4', entry_protocol='m3u8_native')
-
- self._prefer_source(formats)
- info['formats'] = formats
-
- parsed_url = compat_urllib_parse_urlparse(url)
- query = compat_parse_qs(parsed_url.query)
- if 't' in query:
- info['start_time'] = parse_duration(query['t'][0])
-
- if info.get('timestamp') is not None:
- info['subtitles'] = {
- 'rechat': [{
- 'url': update_url_query(
- 'https://rechat.twitch.tv/rechat-messages', {
- 'video_id': 'v%s' % item_id,
- 'start': info['timestamp'],
- }),
- 'ext': 'json',
- }],
- }
-
- return info
-
-
-class TwitchPlaylistBaseIE(TwitchBaseIE):
- _PLAYLIST_PATH = 'kraken/channels/%s/videos/?offset=%d&limit=%d'
- _PAGE_LIMIT = 100
-
- def _extract_playlist(self, channel_id):
- info = self._call_api(
- 'kraken/channels/%s' % channel_id,
- channel_id, 'Downloading channel info JSON')
- channel_name = info.get('display_name') or info.get('name')
- entries = []
- offset = 0
- limit = self._PAGE_LIMIT
- broken_paging_detected = False
- counter_override = None
- for counter in itertools.count(1):
- response = self._call_api(
- self._PLAYLIST_PATH % (channel_id, offset, limit),
- channel_id,
- 'Downloading %s JSON page %s'
- % (self._PLAYLIST_TYPE, counter_override or counter))
- page_entries = self._extract_playlist_page(response)
- if not page_entries:
- break
- total = int_or_none(response.get('_total'))
- # Since the beginning of March 2016 twitch's paging mechanism
- # is completely broken on the twitch side. It simply ignores
- # a limit and returns the whole offset number of videos.
- # Working around by just requesting all videos at once.
- # Upd: pagination bug was fixed by twitch on 15.03.2016.
- if not broken_paging_detected and total and len(page_entries) > limit:
- self.report_warning(
- 'Twitch pagination is broken on twitch side, requesting all videos at once',
- channel_id)
- broken_paging_detected = True
- offset = total
- counter_override = '(all at once)'
- continue
- entries.extend(page_entries)
- if broken_paging_detected or total and len(page_entries) >= total:
- break
- offset += limit
- return self.playlist_result(
- [self._make_url_result(entry) for entry in orderedSet(entries)],
- channel_id, channel_name)
-
- def _make_url_result(self, url):
- try:
- video_id = 'v%s' % TwitchVodIE._match_id(url)
- return self.url_result(url, TwitchVodIE.ie_key(), video_id=video_id)
- except AssertionError:
- return self.url_result(url)
-
- def _extract_playlist_page(self, response):
- videos = response.get('videos')
- return [video['url'] for video in videos] if videos else []
-
- def _real_extract(self, url):
- return self._extract_playlist(self._match_id(url))
-
-
-class TwitchProfileIE(TwitchPlaylistBaseIE):
- IE_NAME = 'twitch:profile'
- _VALID_URL = r'%s/(?P<id>[^/]+)/profile/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
- _PLAYLIST_TYPE = 'profile'
-
- _TESTS = [{
- 'url': 'http://www.twitch.tv/vanillatv/profile',
- 'info_dict': {
- 'id': 'vanillatv',
- 'title': 'VanillaTV',
- },
- 'playlist_mincount': 412,
- }, {
- 'url': 'http://m.twitch.tv/vanillatv/profile',
- 'only_matching': True,
- }]
-
-
-class TwitchVideosBaseIE(TwitchPlaylistBaseIE):
- _VALID_URL_VIDEOS_BASE = r'%s/(?P<id>[^/]+)/videos' % TwitchBaseIE._VALID_URL_BASE
- _PLAYLIST_PATH = TwitchPlaylistBaseIE._PLAYLIST_PATH + '&broadcast_type='
-
-
-class TwitchAllVideosIE(TwitchVideosBaseIE):
- IE_NAME = 'twitch:videos:all'
- _VALID_URL = r'%s/all' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE
- _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive,upload,highlight'
- _PLAYLIST_TYPE = 'all videos'
-
- _TESTS = [{
- 'url': 'https://www.twitch.tv/spamfish/videos/all',
- 'info_dict': {
- 'id': 'spamfish',
- 'title': 'Spamfish',
- },
- 'playlist_mincount': 869,
- }, {
- 'url': 'https://m.twitch.tv/spamfish/videos/all',
- 'only_matching': True,
- }]
-
-
-class TwitchUploadsIE(TwitchVideosBaseIE):
- IE_NAME = 'twitch:videos:uploads'
- _VALID_URL = r'%s/uploads' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE
- _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'upload'
- _PLAYLIST_TYPE = 'uploads'
-
- _TESTS = [{
- 'url': 'https://www.twitch.tv/spamfish/videos/uploads',
- 'info_dict': {
- 'id': 'spamfish',
- 'title': 'Spamfish',
- },
- 'playlist_mincount': 0,
- }, {
- 'url': 'https://m.twitch.tv/spamfish/videos/uploads',
- 'only_matching': True,
- }]
-
-
-class TwitchPastBroadcastsIE(TwitchVideosBaseIE):
- IE_NAME = 'twitch:videos:past-broadcasts'
- _VALID_URL = r'%s/past-broadcasts' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE
- _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive'
- _PLAYLIST_TYPE = 'past broadcasts'
-
- _TESTS = [{
- 'url': 'https://www.twitch.tv/spamfish/videos/past-broadcasts',
- 'info_dict': {
- 'id': 'spamfish',
- 'title': 'Spamfish',
- },
- 'playlist_mincount': 0,
- }, {
- 'url': 'https://m.twitch.tv/spamfish/videos/past-broadcasts',
- 'only_matching': True,
- }]
-
-
-class TwitchHighlightsIE(TwitchVideosBaseIE):
- IE_NAME = 'twitch:videos:highlights'
- _VALID_URL = r'%s/highlights' % TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE
- _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'highlight'
- _PLAYLIST_TYPE = 'highlights'
-
- _TESTS = [{
- 'url': 'https://www.twitch.tv/spamfish/videos/highlights',
- 'info_dict': {
- 'id': 'spamfish',
- 'title': 'Spamfish',
- },
- 'playlist_mincount': 805,
- }, {
- 'url': 'https://m.twitch.tv/spamfish/videos/highlights',
- 'only_matching': True,
- }]
-
-
-class TwitchStreamIE(TwitchBaseIE):
- IE_NAME = 'twitch:stream'
- _VALID_URL = r'''(?x)
- https?://
- (?:
- (?:(?:www|go|m)\.)?twitch\.tv/|
- player\.twitch\.tv/\?.*?\bchannel=
- )
- (?P<id>[^/#?]+)
- '''
-
- _TESTS = [{
- 'url': 'http://www.twitch.tv/shroomztv',
- 'info_dict': {
- 'id': '12772022048',
- 'display_id': 'shroomztv',
- 'ext': 'mp4',
- 'title': 're:^ShroomzTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
- 'description': 'H1Z1 - lonewolfing with ShroomzTV | A3 Battle Royale later - @ShroomzTV',
- 'is_live': True,
- 'timestamp': 1421928037,
- 'upload_date': '20150122',
- 'uploader': 'ShroomzTV',
- 'uploader_id': 'shroomztv',
- 'view_count': int,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- 'url': 'http://www.twitch.tv/miracle_doto#profile-0',
- 'only_matching': True,
- }, {
- 'url': 'https://player.twitch.tv/?channel=lotsofs',
- 'only_matching': True,
- }, {
- 'url': 'https://go.twitch.tv/food',
- 'only_matching': True,
- }, {
- 'url': 'https://m.twitch.tv/food',
- 'only_matching': True,
- }]
-
- @classmethod
- def suitable(cls, url):
- return (False
- if any(ie.suitable(url) for ie in (
- TwitchVideoIE,
- TwitchChapterIE,
- TwitchVodIE,
- TwitchProfileIE,
- TwitchAllVideosIE,
- TwitchUploadsIE,
- TwitchPastBroadcastsIE,
- TwitchHighlightsIE,
- TwitchClipsIE))
- else super(TwitchStreamIE, cls).suitable(url))
-
- def _real_extract(self, url):
- channel_id = self._match_id(url)
-
- stream = self._call_api(
- 'kraken/streams/%s?stream_type=all' % channel_id, channel_id,
- 'Downloading stream JSON').get('stream')
-
- if not stream:
- raise ExtractorError('%s is offline' % channel_id, expected=True)
-
- # Channel name may be typed if different case than the original channel name
- # (e.g. http://www.twitch.tv/TWITCHPLAYSPOKEMON) that will lead to constructing
- # an invalid m3u8 URL. Working around by use of original channel name from stream
- # JSON and fallback to lowercase if it's not available.
- channel_id = stream.get('channel', {}).get('name') or channel_id.lower()
-
- access_token = self._call_api(
- 'api/channels/%s/access_token' % channel_id, channel_id,
- 'Downloading channel access token')
-
- query = {
- 'allow_source': 'true',
- 'allow_audio_only': 'true',
- 'allow_spectre': 'true',
- 'p': random.randint(1000000, 10000000),
- 'player': 'twitchweb',
- 'segment_preference': '4',
- 'sig': access_token['sig'].encode('utf-8'),
- 'token': access_token['token'].encode('utf-8'),
- }
- formats = self._extract_m3u8_formats(
- '%s/api/channel/hls/%s.m3u8?%s'
- % (self._USHER_BASE, channel_id, compat_urllib_parse_urlencode(query)),
- channel_id, 'mp4')
- self._prefer_source(formats)
-
- view_count = stream.get('viewers')
- timestamp = parse_iso8601(stream.get('created_at'))
-
- channel = stream['channel']
- title = self._live_title(channel.get('display_name') or channel.get('name'))
- description = channel.get('status')
-
- thumbnails = []
- for thumbnail_key, thumbnail_url in stream['preview'].items():
- m = re.search(r'(?P<width>\d+)x(?P<height>\d+)\.jpg$', thumbnail_key)
- if not m:
- continue
- thumbnails.append({
- 'url': thumbnail_url,
- 'width': int(m.group('width')),
- 'height': int(m.group('height')),
- })
-
- return {
- 'id': compat_str(stream['_id']),
- 'display_id': channel_id,
- 'title': title,
- 'description': description,
- 'thumbnails': thumbnails,
- 'uploader': channel.get('display_name'),
- 'uploader_id': channel.get('name'),
- 'timestamp': timestamp,
- 'view_count': view_count,
- 'formats': formats,
- 'is_live': True,
- }
-
-
-class TwitchClipsIE(TwitchBaseIE):
- IE_NAME = 'twitch:clips'
- _VALID_URL = r'https?://(?:clips\.twitch\.tv/(?:[^/]+/)*|(?:www\.)?twitch\.tv/[^/]+/clip/)(?P<id>[^/?#&]+)'
-
- _TESTS = [{
- 'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
- 'md5': '761769e1eafce0ffebfb4089cb3847cd',
- 'info_dict': {
- 'id': '42850523',
- 'ext': 'mp4',
- 'title': 'EA Play 2016 Live from the Novo Theatre',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'timestamp': 1465767393,
- 'upload_date': '20160612',
- 'creator': 'EA',
- 'uploader': 'stereotype_',
- 'uploader_id': '43566419',
- },
- }, {
- # multiple formats
- 'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy',
- 'only_matching': True,
- }, {
- 'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- status = self._download_json(
- 'https://clips.twitch.tv/api/v2/clips/%s/status' % video_id,
- video_id)
-
- formats = []
-
- for option in status['quality_options']:
- if not isinstance(option, dict):
- continue
- source = url_or_none(option.get('source'))
- if not source:
- continue
- formats.append({
- 'url': source,
- 'format_id': option.get('quality'),
- 'height': int_or_none(option.get('quality')),
- 'fps': int_or_none(option.get('frame_rate')),
- })
-
- self._sort_formats(formats)
-
- info = {
- 'formats': formats,
- }
-
- clip = self._call_api(
- 'kraken/clips/%s' % video_id, video_id, fatal=False, headers={
- 'Accept': 'application/vnd.twitchtv.v5+json',
- })
-
- if clip:
- quality_key = qualities(('tiny', 'small', 'medium'))
- thumbnails = []
- thumbnails_dict = clip.get('thumbnails')
- if isinstance(thumbnails_dict, dict):
- for thumbnail_id, thumbnail_url in thumbnails_dict.items():
- thumbnails.append({
- 'id': thumbnail_id,
- 'url': thumbnail_url,
- 'preference': quality_key(thumbnail_id),
- })
-
- info.update({
- 'id': clip.get('tracking_id') or video_id,
- 'title': clip.get('title') or video_id,
- 'duration': float_or_none(clip.get('duration')),
- 'views': int_or_none(clip.get('views')),
- 'timestamp': unified_timestamp(clip.get('created_at')),
- 'thumbnails': thumbnails,
- 'creator': try_get(clip, lambda x: x['broadcaster']['display_name'], compat_str),
- 'uploader': try_get(clip, lambda x: x['curator']['display_name'], compat_str),
- 'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str),
- })
- else:
- info.update({
- 'title': video_id,
- 'id': video_id,
- })
-
- return info
diff --git a/youtube_dl/extractor/twitter.py b/youtube_dl/extractor/twitter.py
deleted file mode 100644
index cebb6238c..000000000
--- a/youtube_dl/extractor/twitter.py
+++ /dev/null
@@ -1,575 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_urlparse
-from ..utils import (
- determine_ext,
- dict_get,
- ExtractorError,
- float_or_none,
- int_or_none,
- remove_end,
- try_get,
- xpath_text,
-)
-
-from .periscope import PeriscopeIE
-
-
-class TwitterBaseIE(InfoExtractor):
- def _extract_formats_from_vmap_url(self, vmap_url, video_id):
- vmap_data = self._download_xml(vmap_url, video_id)
- video_url = xpath_text(vmap_data, './/MediaFile').strip()
- if determine_ext(video_url) == 'm3u8':
- return self._extract_m3u8_formats(
- video_url, video_id, ext='mp4', m3u8_id='hls',
- entry_protocol='m3u8_native')
- return [{
- 'url': video_url,
- }]
-
- @staticmethod
- def _search_dimensions_in_video_url(a_format, video_url):
- m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
- if m:
- a_format.update({
- 'width': int(m.group('width')),
- 'height': int(m.group('height')),
- })
-
-
-class TwitterCardIE(TwitterBaseIE):
- IE_NAME = 'twitter:card'
- _VALID_URL = r'https?://(?:www\.)?twitter\.com/i/(?P<path>cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
- _TESTS = [
- {
- 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
- # MD5 checksums are different in different places
- 'info_dict': {
- 'id': '560070183650213889',
- 'ext': 'mp4',
- 'title': 'Twitter web player',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 30.033,
- },
- },
- {
- 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
- 'md5': '7ee2a553b63d1bccba97fbed97d9e1c8',
- 'info_dict': {
- 'id': '623160978427936768',
- 'ext': 'mp4',
- 'title': 'Twitter web player',
- 'thumbnail': r're:^https?://.*$',
- },
- },
- {
- 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
- 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
- 'info_dict': {
- 'id': 'dq4Oj5quskI',
- 'ext': 'mp4',
- 'title': 'Ubuntu 11.10 Overview',
- 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
- 'upload_date': '20111013',
- 'uploader': 'OMG! Ubuntu!',
- 'uploader_id': 'omgubuntu',
- },
- 'add_ie': ['Youtube'],
- },
- {
- 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
- 'md5': '6dabeaca9e68cbb71c99c322a4b42a11',
- 'info_dict': {
- 'id': 'iBb2x00UVlv',
- 'ext': 'mp4',
- 'upload_date': '20151113',
- 'uploader_id': '1189339351084113920',
- 'uploader': 'ArsenalTerje',
- 'title': 'Vine by ArsenalTerje',
- 'timestamp': 1447451307,
- },
- 'add_ie': ['Vine'],
- }, {
- 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
- 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
- 'info_dict': {
- 'id': '705235433198714880',
- 'ext': 'mp4',
- 'title': 'Twitter web player',
- 'thumbnail': r're:^https?://.*',
- },
- }, {
- 'url': 'https://twitter.com/i/videos/752274308186120192',
- 'only_matching': True,
- },
- ]
-
- _API_BASE = 'https://api.twitter.com/1.1'
-
- def _parse_media_info(self, media_info, video_id):
- formats = []
- for media_variant in media_info.get('variants', []):
- media_url = media_variant['url']
- if media_url.endswith('.m3u8'):
- formats.extend(self._extract_m3u8_formats(media_url, video_id, ext='mp4', m3u8_id='hls'))
- elif media_url.endswith('.mpd'):
- formats.extend(self._extract_mpd_formats(media_url, video_id, mpd_id='dash'))
- else:
- tbr = int_or_none(dict_get(media_variant, ('bitRate', 'bitrate')), scale=1000)
- a_format = {
- 'url': media_url,
- 'format_id': 'http-%d' % tbr if tbr else 'http',
- 'tbr': tbr,
- }
- # Reported bitRate may be zero
- if not a_format['tbr']:
- del a_format['tbr']
-
- self._search_dimensions_in_video_url(a_format, media_url)
-
- formats.append(a_format)
- return formats
-
- def _extract_mobile_formats(self, username, video_id):
- webpage = self._download_webpage(
- 'https://mobile.twitter.com/%s/status/%s' % (username, video_id),
- video_id, 'Downloading mobile webpage',
- headers={
- # A recent mobile UA is necessary for `gt` cookie
- 'User-Agent': 'Mozilla/5.0 (Android 6.0.1; Mobile; rv:54.0) Gecko/54.0 Firefox/54.0',
- })
- main_script_url = self._html_search_regex(
- r'<script[^>]+src="([^"]+main\.[^"]+)"', webpage, 'main script URL')
- main_script = self._download_webpage(
- main_script_url, video_id, 'Downloading main script')
- bearer_token = self._search_regex(
- r'BEARER_TOKEN\s*:\s*"([^"]+)"',
- main_script, 'bearer token')
- # https://developer.twitter.com/en/docs/tweets/post-and-engage/api-reference/get-statuses-show-id
- api_data = self._download_json(
- '%s/statuses/show/%s.json' % (self._API_BASE, video_id),
- video_id, 'Downloading API data',
- headers={
- 'Authorization': 'Bearer ' + bearer_token,
- })
- media_info = try_get(api_data, lambda o: o['extended_entities']['media'][0]['video_info']) or {}
- return self._parse_media_info(media_info, video_id)
-
- def _real_extract(self, url):
- path, video_id = re.search(self._VALID_URL, url).groups()
-
- config = None
- formats = []
- duration = None
-
- urls = [url]
- if path.startswith('cards/'):
- urls.append('https://twitter.com/i/videos/' + video_id)
-
- for u in urls:
- webpage = self._download_webpage(
- u, video_id, headers={'Referer': 'https://twitter.com/'})
-
- iframe_url = self._html_search_regex(
- r'<iframe[^>]+src="((?:https?:)?//(?:www\.youtube\.com/embed/[^"]+|(?:www\.)?vine\.co/v/\w+/card))"',
- webpage, 'video iframe', default=None)
- if iframe_url:
- return self.url_result(iframe_url)
-
- config = self._parse_json(self._html_search_regex(
- r'data-(?:player-)?config="([^"]+)"', webpage,
- 'data player config', default='{}'),
- video_id)
-
- if config.get('source_type') == 'vine':
- return self.url_result(config['player_url'], 'Vine')
-
- periscope_url = PeriscopeIE._extract_url(webpage)
- if periscope_url:
- return self.url_result(periscope_url, PeriscopeIE.ie_key())
-
- video_url = config.get('video_url') or config.get('playlist', [{}])[0].get('source')
-
- if video_url:
- if determine_ext(video_url) == 'm3u8':
- formats.extend(self._extract_m3u8_formats(video_url, video_id, ext='mp4', m3u8_id='hls'))
- else:
- f = {
- 'url': video_url,
- }
-
- self._search_dimensions_in_video_url(f, video_url)
-
- formats.append(f)
-
- vmap_url = config.get('vmapUrl') or config.get('vmap_url')
- if vmap_url:
- formats.extend(
- self._extract_formats_from_vmap_url(vmap_url, video_id))
-
- media_info = None
-
- for entity in config.get('status', {}).get('entities', []):
- if 'mediaInfo' in entity:
- media_info = entity['mediaInfo']
-
- if media_info:
- formats.extend(self._parse_media_info(media_info, video_id))
- duration = float_or_none(media_info.get('duration', {}).get('nanos'), scale=1e9)
-
- username = config.get('user', {}).get('screen_name')
- if username:
- formats.extend(self._extract_mobile_formats(username, video_id))
-
- if formats:
- title = self._search_regex(r'<title>([^<]+)</title>', webpage, 'title')
- thumbnail = config.get('posterImageUrl') or config.get('image_src')
- duration = float_or_none(config.get('duration'), scale=1000) or duration
- break
-
- if not formats:
- headers = {
- 'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw',
- 'Referer': url,
- }
- ct0 = self._get_cookies(url).get('ct0')
- if ct0:
- headers['csrf_token'] = ct0.value
- guest_token = self._download_json(
- '%s/guest/activate.json' % self._API_BASE, video_id,
- 'Downloading guest token', data=b'',
- headers=headers)['guest_token']
- headers['x-guest-token'] = guest_token
- self._set_cookie('api.twitter.com', 'gt', guest_token)
- config = self._download_json(
- '%s/videos/tweet/config/%s.json' % (self._API_BASE, video_id),
- video_id, headers=headers)
- track = config['track']
- vmap_url = track.get('vmapUrl')
- if vmap_url:
- formats = self._extract_formats_from_vmap_url(vmap_url, video_id)
- else:
- playback_url = track['playbackUrl']
- if determine_ext(playback_url) == 'm3u8':
- formats = self._extract_m3u8_formats(
- playback_url, video_id, 'mp4',
- entry_protocol='m3u8_native', m3u8_id='hls')
- else:
- formats = [{
- 'url': playback_url,
- }]
- title = 'Twitter web player'
- thumbnail = config.get('posterImage')
- duration = float_or_none(track.get('durationMs'), scale=1000)
-
- self._remove_duplicate_formats(formats)
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'formats': formats,
- }
-
-
-class TwitterIE(InfoExtractor):
- IE_NAME = 'twitter'
- _VALID_URL = r'https?://(?:www\.|m\.|mobile\.)?twitter\.com/(?:i/web|(?P<user_id>[^/]+))/status/(?P<id>\d+)'
- _TEMPLATE_URL = 'https://twitter.com/%s/status/%s'
- _TEMPLATE_STATUSES_URL = 'https://twitter.com/statuses/%s'
-
- _TESTS = [{
- 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
- 'info_dict': {
- 'id': '643211948184596480',
- 'ext': 'mp4',
- 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'description': 'FREE THE NIPPLE on Twitter: "FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ"',
- 'uploader': 'FREE THE NIPPLE',
- 'uploader_id': 'freethenipple',
- 'duration': 12.922,
- },
- }, {
- 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
- 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
- 'info_dict': {
- 'id': '657991469417025536',
- 'ext': 'mp4',
- 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
- 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
- 'thumbnail': r're:^https?://.*\.png',
- 'uploader': 'Gifs',
- 'uploader_id': 'giphz',
- },
- 'expected_warnings': ['height', 'width'],
- 'skip': 'Account suspended',
- }, {
- 'url': 'https://twitter.com/starwars/status/665052190608723968',
- 'info_dict': {
- 'id': '665052190608723968',
- 'ext': 'mp4',
- 'title': 'Star Wars - A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens.',
- 'description': 'Star Wars on Twitter: "A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens."',
- 'uploader_id': 'starwars',
- 'uploader': 'Star Wars',
- },
- }, {
- 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
- 'info_dict': {
- 'id': '705235433198714880',
- 'ext': 'mp4',
- 'title': 'Brent Yarina - Khalil Iverson\'s missed highlight dunk. And made highlight dunk. In one highlight.',
- 'description': 'Brent Yarina on Twitter: "Khalil Iverson\'s missed highlight dunk. And made highlight dunk. In one highlight."',
- 'uploader_id': 'BTNBrentYarina',
- 'uploader': 'Brent Yarina',
- },
- 'params': {
- # The same video as https://twitter.com/i/videos/tweet/705235433198714880
- # Test case of TwitterCardIE
- 'skip_download': True,
- },
- }, {
- 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
- 'info_dict': {
- 'id': '700207533655363584',
- 'ext': 'mp4',
- 'title': 'JG - BEAT PROD: @suhmeduh #Damndaniel',
- 'description': 'JG on Twitter: "BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ"',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'uploader': 'JG',
- 'uploader_id': 'jaydingeer',
- 'duration': 30.0,
- },
- }, {
- 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
- 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
- 'info_dict': {
- 'id': 'MIOxnrUteUd',
- 'ext': 'mp4',
- 'title': 'Vince Mancini - Vine of the day',
- 'description': 'Vince Mancini on Twitter: "Vine of the day https://t.co/xmTvRdqxWf"',
- 'uploader': 'Vince Mancini',
- 'uploader_id': 'Filmdrunk',
- 'timestamp': 1402826626,
- 'upload_date': '20140615',
- },
- 'add_ie': ['Vine'],
- }, {
- 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
- 'info_dict': {
- 'id': '719944021058060289',
- 'ext': 'mp4',
- 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
- 'description': 'Captain America on Twitter: "@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI"',
- 'uploader_id': 'captainamerica',
- 'uploader': 'Captain America',
- 'duration': 3.17,
- },
- }, {
- 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
- 'info_dict': {
- 'id': '1zqKVVlkqLaKB',
- 'ext': 'mp4',
- 'title': 'Sgt Kerry Schmidt - LIVE on #Periscope: Road rage, mischief, assault, rollover and fire in one occurrence',
- 'description': 'Sgt Kerry Schmidt on Twitter: "LIVE on #Periscope: Road rage, mischief, assault, rollover and fire in one occurrence https://t.co/EKrVgIXF3s"',
- 'upload_date': '20160923',
- 'uploader_id': 'OPP_HSD',
- 'uploader': 'Sgt Kerry Schmidt',
- 'timestamp': 1474613214,
- },
- 'add_ie': ['Periscope'],
- }, {
- # has mp4 formats via mobile API
- 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
- 'info_dict': {
- 'id': '852138619213144067',
- 'ext': 'mp4',
- 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
- 'description': 'عالم الأخبار on Twitter: "كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN"',
- 'uploader': 'عالم الأخبار',
- 'uploader_id': 'news_al3alm',
- 'duration': 277.4,
- },
- }, {
- 'url': 'https://twitter.com/i/web/status/910031516746514432',
- 'info_dict': {
- 'id': '910031516746514432',
- 'ext': 'mp4',
- 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'description': 'Préfet de Guadeloupe on Twitter: "[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo"',
- 'uploader': 'Préfet de Guadeloupe',
- 'uploader_id': 'Prefet971',
- 'duration': 47.48,
- },
- 'params': {
- 'skip_download': True, # requires ffmpeg
- },
- }, {
- # card via api.twitter.com/1.1/videos/tweet/config
- 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
- 'info_dict': {
- 'id': '1001551623938805763',
- 'ext': 'mp4',
- 'title': 're:.*?Shep is on a roll today.*?',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'description': 'md5:63b036c228772523ae1924d5f8e5ed6b',
- 'uploader': 'Lis Power',
- 'uploader_id': 'LisPower1',
- 'duration': 111.278,
- },
- 'params': {
- 'skip_download': True, # requires ffmpeg
- },
- }, {
- 'url': 'https://twitter.com/foobar/status/1087791357756956680',
- 'info_dict': {
- 'id': '1087791357756956680',
- 'ext': 'mp4',
- 'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'description': 'md5:66d493500c013e3e2d434195746a7f78',
- 'uploader': 'Twitter',
- 'uploader_id': 'Twitter',
- 'duration': 61.567,
- },
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- twid = mobj.group('id')
-
- webpage, urlh = self._download_webpage_handle(
- self._TEMPLATE_STATUSES_URL % twid, twid)
-
- if 'twitter.com/account/suspended' in urlh.geturl():
- raise ExtractorError('Account suspended by Twitter.', expected=True)
-
- user_id = None
-
- redirect_mobj = re.match(self._VALID_URL, urlh.geturl())
- if redirect_mobj:
- user_id = redirect_mobj.group('user_id')
-
- if not user_id:
- user_id = mobj.group('user_id')
-
- username = remove_end(self._og_search_title(webpage), ' on Twitter')
-
- title = description = self._og_search_description(webpage).strip('').replace('\n', ' ').strip('“”')
-
- # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
- title = re.sub(r'\s+(https?://[^ ]+)', '', title)
-
- info = {
- 'uploader_id': user_id,
- 'uploader': username,
- 'webpage_url': url,
- 'description': '%s on Twitter: "%s"' % (username, description),
- 'title': username + ' - ' + title,
- }
-
- mobj = re.search(r'''(?x)
- <video[^>]+class="animated-gif"(?P<more_info>[^>]+)>\s*
- <source[^>]+video-src="(?P<url>[^"]+)"
- ''', webpage)
-
- if mobj:
- more_info = mobj.group('more_info')
- height = int_or_none(self._search_regex(
- r'data-height="(\d+)"', more_info, 'height', fatal=False))
- width = int_or_none(self._search_regex(
- r'data-width="(\d+)"', more_info, 'width', fatal=False))
- thumbnail = self._search_regex(
- r'poster="([^"]+)"', more_info, 'poster', fatal=False)
- info.update({
- 'id': twid,
- 'url': mobj.group('url'),
- 'height': height,
- 'width': width,
- 'thumbnail': thumbnail,
- })
- return info
-
- twitter_card_url = None
- if 'class="PlayableMedia' in webpage:
- twitter_card_url = '%s//twitter.com/i/videos/tweet/%s' % (self.http_scheme(), twid)
- else:
- twitter_card_iframe_url = self._search_regex(
- r'data-full-card-iframe-url=([\'"])(?P<url>(?:(?!\1).)+)\1',
- webpage, 'Twitter card iframe URL', default=None, group='url')
- if twitter_card_iframe_url:
- twitter_card_url = compat_urlparse.urljoin(url, twitter_card_iframe_url)
-
- if twitter_card_url:
- info.update({
- '_type': 'url_transparent',
- 'ie_key': 'TwitterCard',
- 'url': twitter_card_url,
- })
- return info
-
- raise ExtractorError('There\'s no video in this tweet.')
-
-
-class TwitterAmplifyIE(TwitterBaseIE):
- IE_NAME = 'twitter:amplify'
- _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
-
- _TEST = {
- 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
- 'md5': '7df102d0b9fd7066b86f3159f8e81bf6',
- 'info_dict': {
- 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
- 'ext': 'mp4',
- 'title': 'Twitter Video',
- 'thumbnail': 're:^https?://.*',
- },
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- vmap_url = self._html_search_meta(
- 'twitter:amplify:vmap', webpage, 'vmap url')
- formats = self._extract_formats_from_vmap_url(vmap_url, video_id)
-
- thumbnails = []
- thumbnail = self._html_search_meta(
- 'twitter:image:src', webpage, 'thumbnail', fatal=False)
-
- def _find_dimension(target):
- w = int_or_none(self._html_search_meta(
- 'twitter:%s:width' % target, webpage, fatal=False))
- h = int_or_none(self._html_search_meta(
- 'twitter:%s:height' % target, webpage, fatal=False))
- return w, h
-
- if thumbnail:
- thumbnail_w, thumbnail_h = _find_dimension('image')
- thumbnails.append({
- 'url': thumbnail,
- 'width': thumbnail_w,
- 'height': thumbnail_h,
- })
-
- video_w, video_h = _find_dimension('player')
- formats[0].update({
- 'width': video_w,
- 'height': video_h,
- })
-
- return {
- 'id': video_id,
- 'title': 'Twitter Video',
- 'formats': formats,
- 'thumbnails': thumbnails,
- }
diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py
deleted file mode 100644
index 2a4faecef..000000000
--- a/youtube_dl/extractor/udemy.py
+++ /dev/null
@@ -1,481 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_kwargs,
- compat_str,
- compat_urllib_request,
- compat_urlparse,
-)
-from ..utils import (
- determine_ext,
- extract_attributes,
- ExtractorError,
- float_or_none,
- int_or_none,
- js_to_json,
- sanitized_Request,
- try_get,
- unescapeHTML,
- url_or_none,
- urlencode_postdata,
-)
-
-
-class UdemyIE(InfoExtractor):
- IE_NAME = 'udemy'
- _VALID_URL = r'''(?x)
- https?://
- (?:[^/]+\.)?udemy\.com/
- (?:
- [^#]+\#/lecture/|
- lecture/view/?\?lectureId=|
- [^/]+/learn/v4/t/lecture/
- )
- (?P<id>\d+)
- '''
- _LOGIN_URL = 'https://www.udemy.com/join/login-popup/?displayType=ajax&showSkipButton=1'
- _ORIGIN_URL = 'https://www.udemy.com'
- _NETRC_MACHINE = 'udemy'
-
- _TESTS = [{
- 'url': 'https://www.udemy.com/java-tutorial/#/lecture/172757',
- 'md5': '98eda5b657e752cf945d8445e261b5c5',
- 'info_dict': {
- 'id': '160614',
- 'ext': 'mp4',
- 'title': 'Introduction and Installation',
- 'description': 'md5:c0d51f6f21ef4ec65f091055a5eef876',
- 'duration': 579.29,
- },
- 'skip': 'Requires udemy account credentials',
- }, {
- # new URL schema
- 'url': 'https://www.udemy.com/electric-bass-right-from-the-start/learn/v4/t/lecture/4580906',
- 'only_matching': True,
- }, {
- # no url in outputs format entry
- 'url': 'https://www.udemy.com/learn-web-development-complete-step-by-step-guide-to-success/learn/v4/t/lecture/4125812',
- 'only_matching': True,
- }, {
- # only outputs rendition
- 'url': 'https://www.udemy.com/how-you-can-help-your-local-community-5-amazing-examples/learn/v4/t/lecture/3225750?start=0',
- 'only_matching': True,
- }, {
- 'url': 'https://wipro.udemy.com/java-tutorial/#/lecture/172757',
- 'only_matching': True,
- }]
-
- def _extract_course_info(self, webpage, video_id):
- course = self._parse_json(
- unescapeHTML(self._search_regex(
- r'ng-init=["\'].*\bcourse=({.+?})[;"\']',
- webpage, 'course', default='{}')),
- video_id, fatal=False) or {}
- course_id = course.get('id') or self._search_regex(
- [
- r'data-course-id=["\'](\d+)',
- r'&quot;courseId&quot;\s*:\s*(\d+)'
- ], webpage, 'course id')
- return course_id, course.get('title')
-
- def _enroll_course(self, base_url, webpage, course_id):
- def combine_url(base_url, url):
- return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url
-
- checkout_url = unescapeHTML(self._search_regex(
- r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/(?:payment|cart)/checkout/.+?)\1',
- webpage, 'checkout url', group='url', default=None))
- if checkout_url:
- raise ExtractorError(
- 'Course %s is not free. You have to pay for it before you can download. '
- 'Use this URL to confirm purchase: %s'
- % (course_id, combine_url(base_url, checkout_url)),
- expected=True)
-
- enroll_url = unescapeHTML(self._search_regex(
- r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/course/subscribe/.+?)\1',
- webpage, 'enroll url', group='url', default=None))
- if enroll_url:
- webpage = self._download_webpage(
- combine_url(base_url, enroll_url),
- course_id, 'Enrolling in the course',
- headers={'Referer': base_url})
- if '>You have enrolled in' in webpage:
- self.to_screen('%s: Successfully enrolled in the course' % course_id)
-
- def _download_lecture(self, course_id, lecture_id):
- return self._download_json(
- 'https://www.udemy.com/api-2.0/users/me/subscribed-courses/%s/lectures/%s?'
- % (course_id, lecture_id),
- lecture_id, 'Downloading lecture JSON', query={
- 'fields[lecture]': 'title,description,view_html,asset',
- 'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data',
- })
-
- def _handle_error(self, response):
- if not isinstance(response, dict):
- return
- error = response.get('error')
- if error:
- error_str = 'Udemy returned error #%s: %s' % (error.get('code'), error.get('message'))
- error_data = error.get('data')
- if error_data:
- error_str += ' - %s' % error_data.get('formErrors')
- raise ExtractorError(error_str, expected=True)
-
- def _download_webpage_handle(self, *args, **kwargs):
- headers = kwargs.get('headers', {}).copy()
- headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'
- kwargs['headers'] = headers
- ret = super(UdemyIE, self)._download_webpage_handle(
- *args, **compat_kwargs(kwargs))
- if not ret:
- return ret
- webpage, _ = ret
- if any(p in webpage for p in (
- '>Please verify you are a human',
- 'Access to this page has been denied because we believe you are using automation tools to browse the website',
- '"_pxCaptcha"')):
- raise ExtractorError(
- 'Udemy asks you to solve a CAPTCHA. Login with browser, '
- 'solve CAPTCHA, then export cookies and pass cookie file to '
- 'youtube-dl with --cookies.', expected=True)
- return ret
-
- def _download_json(self, url_or_request, *args, **kwargs):
- headers = {
- 'X-Udemy-Snail-Case': 'true',
- 'X-Requested-With': 'XMLHttpRequest',
- }
- for cookie in self._downloader.cookiejar:
- if cookie.name == 'client_id':
- headers['X-Udemy-Client-Id'] = cookie.value
- elif cookie.name == 'access_token':
- headers['X-Udemy-Bearer-Token'] = cookie.value
- headers['X-Udemy-Authorization'] = 'Bearer %s' % cookie.value
-
- if isinstance(url_or_request, compat_urllib_request.Request):
- for header, value in headers.items():
- url_or_request.add_header(header, value)
- else:
- url_or_request = sanitized_Request(url_or_request, headers=headers)
-
- response = super(UdemyIE, self)._download_json(url_or_request, *args, **kwargs)
- self._handle_error(response)
- return response
-
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
- login_popup = self._download_webpage(
- self._LOGIN_URL, None, 'Downloading login popup')
-
- def is_logged(webpage):
- return any(re.search(p, webpage) for p in (
- r'href=["\'](?:https://www\.udemy\.com)?/user/logout/',
- r'>Logout<'))
-
- # already logged in
- if is_logged(login_popup):
- return
-
- login_form = self._form_hidden_inputs('login-form', login_popup)
-
- login_form.update({
- 'email': username,
- 'password': password,
- })
-
- response = self._download_webpage(
- self._LOGIN_URL, None, 'Logging in',
- data=urlencode_postdata(login_form),
- headers={
- 'Referer': self._ORIGIN_URL,
- 'Origin': self._ORIGIN_URL,
- })
-
- if not is_logged(response):
- error = self._html_search_regex(
- r'(?s)<div[^>]+class="form-errors[^"]*">(.+?)</div>',
- response, 'error message', default=None)
- if error:
- raise ExtractorError('Unable to login: %s' % error, expected=True)
- raise ExtractorError('Unable to log in')
-
- def _real_extract(self, url):
- lecture_id = self._match_id(url)
-
- webpage = self._download_webpage(url, lecture_id)
-
- course_id, _ = self._extract_course_info(webpage, lecture_id)
-
- try:
- lecture = self._download_lecture(course_id, lecture_id)
- except ExtractorError as e:
- # Error could possibly mean we are not enrolled in the course
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
- self._enroll_course(url, webpage, course_id)
- lecture = self._download_lecture(course_id, lecture_id)
- else:
- raise
-
- title = lecture['title']
- description = lecture.get('description')
-
- asset = lecture['asset']
-
- asset_type = asset.get('asset_type') or asset.get('assetType')
- if asset_type != 'Video':
- raise ExtractorError(
- 'Lecture %s is not a video' % lecture_id, expected=True)
-
- stream_url = asset.get('stream_url') or asset.get('streamUrl')
- if stream_url:
- youtube_url = self._search_regex(
- r'(https?://www\.youtube\.com/watch\?v=.*)', stream_url, 'youtube URL', default=None)
- if youtube_url:
- return self.url_result(youtube_url, 'Youtube')
-
- video_id = compat_str(asset['id'])
- thumbnail = asset.get('thumbnail_url') or asset.get('thumbnailUrl')
- duration = float_or_none(asset.get('data', {}).get('duration'))
-
- subtitles = {}
- automatic_captions = {}
-
- formats = []
-
- def extract_output_format(src, f_id):
- return {
- 'url': src.get('url'),
- 'format_id': '%sp' % (src.get('height') or f_id),
- 'width': int_or_none(src.get('width')),
- 'height': int_or_none(src.get('height')),
- 'vbr': int_or_none(src.get('video_bitrate_in_kbps')),
- 'vcodec': src.get('video_codec'),
- 'fps': int_or_none(src.get('frame_rate')),
- 'abr': int_or_none(src.get('audio_bitrate_in_kbps')),
- 'acodec': src.get('audio_codec'),
- 'asr': int_or_none(src.get('audio_sample_rate')),
- 'tbr': int_or_none(src.get('total_bitrate_in_kbps')),
- 'filesize': int_or_none(src.get('file_size_in_bytes')),
- }
-
- outputs = asset.get('data', {}).get('outputs')
- if not isinstance(outputs, dict):
- outputs = {}
-
- def add_output_format_meta(f, key):
- output = outputs.get(key)
- if isinstance(output, dict):
- output_format = extract_output_format(output, key)
- output_format.update(f)
- return output_format
- return f
-
- def extract_formats(source_list):
- if not isinstance(source_list, list):
- return
- for source in source_list:
- video_url = url_or_none(source.get('file') or source.get('src'))
- if not video_url:
- continue
- if source.get('type') == 'application/x-mpegURL' or determine_ext(video_url) == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- video_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
- continue
- format_id = source.get('label')
- f = {
- 'url': video_url,
- 'format_id': '%sp' % format_id,
- 'height': int_or_none(format_id),
- }
- if format_id:
- # Some videos contain additional metadata (e.g.
- # https://www.udemy.com/ios9-swift/learn/#/lecture/3383208)
- f = add_output_format_meta(f, format_id)
- formats.append(f)
-
- def extract_subtitles(track_list):
- if not isinstance(track_list, list):
- return
- for track in track_list:
- if not isinstance(track, dict):
- continue
- if track.get('kind') != 'captions':
- continue
- src = url_or_none(track.get('src'))
- if not src:
- continue
- lang = track.get('language') or track.get(
- 'srclang') or track.get('label')
- sub_dict = automatic_captions if track.get(
- 'autogenerated') is True else subtitles
- sub_dict.setdefault(lang, []).append({
- 'url': src,
- })
-
- for url_kind in ('download', 'stream'):
- urls = asset.get('%s_urls' % url_kind)
- if isinstance(urls, dict):
- extract_formats(urls.get('Video'))
-
- captions = asset.get('captions')
- if isinstance(captions, list):
- for cc in captions:
- if not isinstance(cc, dict):
- continue
- cc_url = url_or_none(cc.get('url'))
- if not cc_url:
- continue
- lang = try_get(cc, lambda x: x['locale']['locale'], compat_str)
- sub_dict = (automatic_captions if cc.get('source') == 'auto'
- else subtitles)
- sub_dict.setdefault(lang or 'en', []).append({
- 'url': cc_url,
- })
-
- view_html = lecture.get('view_html')
- if view_html:
- view_html_urls = set()
- for source in re.findall(r'<source[^>]+>', view_html):
- attributes = extract_attributes(source)
- src = attributes.get('src')
- if not src:
- continue
- res = attributes.get('data-res')
- height = int_or_none(res)
- if src in view_html_urls:
- continue
- view_html_urls.add(src)
- if attributes.get('type') == 'application/x-mpegURL' or determine_ext(src) == 'm3u8':
- m3u8_formats = self._extract_m3u8_formats(
- src, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False)
- for f in m3u8_formats:
- m = re.search(r'/hls_(?P<height>\d{3,4})_(?P<tbr>\d{2,})/', f['url'])
- if m:
- if not f.get('height'):
- f['height'] = int(m.group('height'))
- if not f.get('tbr'):
- f['tbr'] = int(m.group('tbr'))
- formats.extend(m3u8_formats)
- else:
- formats.append(add_output_format_meta({
- 'url': src,
- 'format_id': '%dp' % height if height else None,
- 'height': height,
- }, res))
-
- # react rendition since 2017.04.15 (see
- # https://github.com/ytdl-org/youtube-dl/issues/12744)
- data = self._parse_json(
- self._search_regex(
- r'videojs-setup-data=(["\'])(?P<data>{.+?})\1', view_html,
- 'setup data', default='{}', group='data'), video_id,
- transform_source=unescapeHTML, fatal=False)
- if data and isinstance(data, dict):
- extract_formats(data.get('sources'))
- if not duration:
- duration = int_or_none(data.get('duration'))
- extract_subtitles(data.get('tracks'))
-
- if not subtitles and not automatic_captions:
- text_tracks = self._parse_json(
- self._search_regex(
- r'text-tracks=(["\'])(?P<data>\[.+?\])\1', view_html,
- 'text tracks', default='{}', group='data'), video_id,
- transform_source=lambda s: js_to_json(unescapeHTML(s)),
- fatal=False)
- extract_subtitles(text_tracks)
-
- if not formats and outputs:
- for format_id, output in outputs.items():
- f = extract_output_format(output, format_id)
- if f.get('url'):
- formats.append(f)
-
- self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'formats': formats,
- 'subtitles': subtitles,
- 'automatic_captions': automatic_captions,
- }
-
-
-class UdemyCourseIE(UdemyIE):
- IE_NAME = 'udemy:course'
- _VALID_URL = r'https?://(?:[^/]+\.)?udemy\.com/(?P<id>[^/?#&]+)'
- _TESTS = [{
- 'url': 'https://www.udemy.com/java-tutorial/',
- 'only_matching': True,
- }, {
- 'url': 'https://wipro.udemy.com/java-tutorial/',
- 'only_matching': True,
- }]
-
- @classmethod
- def suitable(cls, url):
- return False if UdemyIE.suitable(url) else super(UdemyCourseIE, cls).suitable(url)
-
- def _real_extract(self, url):
- course_path = self._match_id(url)
-
- webpage = self._download_webpage(url, course_path)
-
- course_id, title = self._extract_course_info(webpage, course_path)
-
- self._enroll_course(url, webpage, course_id)
-
- response = self._download_json(
- 'https://www.udemy.com/api-2.0/courses/%s/cached-subscriber-curriculum-items' % course_id,
- course_id, 'Downloading course curriculum', query={
- 'fields[chapter]': 'title,object_index',
- 'fields[lecture]': 'title,asset',
- 'page_size': '1000',
- })
-
- entries = []
- chapter, chapter_number = [None] * 2
- for entry in response['results']:
- clazz = entry.get('_class')
- if clazz == 'lecture':
- asset = entry.get('asset')
- if isinstance(asset, dict):
- asset_type = asset.get('asset_type') or asset.get('assetType')
- if asset_type != 'Video':
- continue
- lecture_id = entry.get('id')
- if lecture_id:
- entry = {
- '_type': 'url_transparent',
- 'url': 'https://www.udemy.com/%s/learn/v4/t/lecture/%s' % (course_path, entry['id']),
- 'title': entry.get('title'),
- 'ie_key': UdemyIE.ie_key(),
- }
- if chapter_number:
- entry['chapter_number'] = chapter_number
- if chapter:
- entry['chapter'] = chapter
- entries.append(entry)
- elif clazz == 'chapter':
- chapter_number = entry.get('object_index')
- chapter = entry.get('title')
-
- return self.playlist_result(entries, course_id, title)
diff --git a/youtube_dl/extractor/ufctv.py b/youtube_dl/extractor/ufctv.py
deleted file mode 100644
index f3eaee6b3..000000000
--- a/youtube_dl/extractor/ufctv.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- parse_duration,
- parse_iso8601,
- urlencode_postdata,
-)
-
-
-class UFCTVIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?ufc\.tv/video/(?P<id>[^/]+)'
- _NETRC_MACHINE = 'ufctv'
- _TEST = {
- 'url': 'https://www.ufc.tv/video/ufc-219-countdown-full-episode',
- 'info_dict': {
- 'id': '34167',
- 'ext': 'mp4',
- 'title': 'UFC 219 Countdown: Full Episode',
- 'description': 'md5:26d4e8bf4665ae5878842d7050c3c646',
- 'timestamp': 1513962360,
- 'upload_date': '20171222',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- }
- }
-
- def _real_initialize(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
- code = self._download_json(
- 'https://www.ufc.tv/secure/authenticate',
- None, 'Logging in', data=urlencode_postdata({
- 'username': username,
- 'password': password,
- 'format': 'json',
- })).get('code')
- if code and code != 'loginsuccess':
- raise ExtractorError(code, expected=True)
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
- video_data = self._download_json(url, display_id, query={
- 'format': 'json',
- })
- video_id = str(video_data['id'])
- title = video_data['name']
- m3u8_url = self._download_json(
- 'https://www.ufc.tv/service/publishpoint', video_id, query={
- 'type': 'video',
- 'format': 'json',
- 'id': video_id,
- }, headers={
- 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0_1 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A402 Safari/604.1',
- })['path']
- m3u8_url = m3u8_url.replace('_iphone.', '.')
- formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': video_data.get('description'),
- 'duration': parse_duration(video_data.get('runtime')),
- 'timestamp': parse_iso8601(video_data.get('releaseDate')),
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/uol.py b/youtube_dl/extractor/uol.py
deleted file mode 100644
index 08f0c072e..000000000
--- a/youtube_dl/extractor/uol.py
+++ /dev/null
@@ -1,159 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- clean_html,
- int_or_none,
- parse_duration,
- update_url_query,
- str_or_none,
-)
-
-
-class UOLIE(InfoExtractor):
- IE_NAME = 'uol.com.br'
- _VALID_URL = r'https?://(?:.+?\.)?uol\.com\.br/.*?(?:(?:mediaId|v)=|view/(?:[a-z0-9]+/)?|video(?:=|/(?:\d{4}/\d{2}/\d{2}/)?))(?P<id>\d+|[\w-]+-[A-Z0-9]+)'
- _TESTS = [{
- 'url': 'http://player.mais.uol.com.br/player_video_v3.swf?mediaId=15951931',
- 'md5': '25291da27dc45e0afb5718a8603d3816',
- 'info_dict': {
- 'id': '15951931',
- 'ext': 'mp4',
- 'title': 'Miss simpatia é encontrada morta',
- 'description': 'md5:3f8c11a0c0556d66daf7e5b45ef823b2',
- }
- }, {
- 'url': 'http://tvuol.uol.com.br/video/incendio-destroi-uma-das-maiores-casas-noturnas-de-londres-04024E9A3268D4C95326',
- 'md5': 'e41a2fb7b7398a3a46b6af37b15c00c9',
- 'info_dict': {
- 'id': '15954259',
- 'ext': 'mp4',
- 'title': 'Incêndio destrói uma das maiores casas noturnas de Londres',
- 'description': 'Em Londres, um incêndio destruiu uma das maiores boates da cidade. Não há informações sobre vítimas.',
- }
- }, {
- 'url': 'http://mais.uol.com.br/static/uolplayer/index.html?mediaId=15951931',
- 'only_matching': True,
- }, {
- 'url': 'http://mais.uol.com.br/view/15954259',
- 'only_matching': True,
- }, {
- 'url': 'http://noticias.band.uol.com.br/brasilurgente/video/2016/08/05/15951931/miss-simpatia-e-encontrada-morta.html',
- 'only_matching': True,
- }, {
- 'url': 'http://videos.band.uol.com.br/programa.asp?e=noticias&pr=brasil-urgente&v=15951931&t=Policia-desmonte-base-do-PCC-na-Cracolandia',
- 'only_matching': True,
- }, {
- 'url': 'http://mais.uol.com.br/view/cphaa0gl2x8r/incendio-destroi-uma-das-maiores-casas-noturnas-de-londres-04024E9A3268D4C95326',
- 'only_matching': True,
- }, {
- 'url': 'http://noticias.uol.com.br//videos/assistir.htm?video=rafaela-silva-inspira-criancas-no-judo-04024D983968D4C95326',
- 'only_matching': True,
- }, {
- 'url': 'http://mais.uol.com.br/view/e0qbgxid79uv/15275470',
- 'only_matching': True,
- }]
-
- _FORMATS = {
- '2': {
- 'width': 640,
- 'height': 360,
- },
- '5': {
- 'width': 1280,
- 'height': 720,
- },
- '6': {
- 'width': 426,
- 'height': 240,
- },
- '7': {
- 'width': 1920,
- 'height': 1080,
- },
- '8': {
- 'width': 192,
- 'height': 144,
- },
- '9': {
- 'width': 568,
- 'height': 320,
- },
- '11': {
- 'width': 640,
- 'height': 360,
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- media_id = None
-
- if video_id.isdigit():
- media_id = video_id
-
- if not media_id:
- embed_page = self._download_webpage(
- 'https://jsuol.com.br/c/tv/uol/embed/?params=[embed,%s]' % video_id,
- video_id, 'Downloading embed page', fatal=False)
- if embed_page:
- media_id = self._search_regex(
- (r'uol\.com\.br/(\d+)', r'mediaId=(\d+)'),
- embed_page, 'media id', default=None)
-
- if not media_id:
- webpage = self._download_webpage(url, video_id)
- media_id = self._search_regex(r'mediaId=(\d+)', webpage, 'media id')
-
- video_data = self._download_json(
- 'http://mais.uol.com.br/apiuol/v3/player/getMedia/%s.json' % media_id,
- media_id)['item']
- title = video_data['title']
-
- query = {
- 'ver': video_data.get('numRevision', 2),
- 'r': 'http://mais.uol.com.br',
- }
- for k in ('token', 'sign'):
- v = video_data.get(k)
- if v:
- query[k] = v
-
- formats = []
- for f in video_data.get('formats', []):
- f_url = f.get('url') or f.get('secureUrl')
- if not f_url:
- continue
- f_url = update_url_query(f_url, query)
- format_id = str_or_none(f.get('id'))
- if format_id == '10':
- formats.extend(self._extract_m3u8_formats(
- f_url, video_id, 'mp4', 'm3u8_native',
- m3u8_id='hls', fatal=False))
- continue
- fmt = {
- 'format_id': format_id,
- 'url': f_url,
- 'source_preference': 1,
- }
- fmt.update(self._FORMATS.get(format_id, {}))
- formats.append(fmt)
- self._sort_formats(formats, ('height', 'width', 'source_preference', 'tbr', 'ext'))
-
- tags = []
- for tag in video_data.get('tags', []):
- tag_description = tag.get('description')
- if not tag_description:
- continue
- tags.append(tag_description)
-
- return {
- 'id': media_id,
- 'title': title,
- 'description': clean_html(video_data.get('desMedia')),
- 'thumbnail': video_data.get('thumbnail'),
- 'duration': int_or_none(video_data.get('durationSeconds')) or parse_duration(video_data.get('duration')),
- 'tags': tags,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py
deleted file mode 100644
index 8fdfd743d..000000000
--- a/youtube_dl/extractor/vice.py
+++ /dev/null
@@ -1,337 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-import time
-import hashlib
-import json
-import random
-
-from .adobepass import AdobePassIE
-from .youtube import YoutubeIE
-from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_str,
-)
-from ..utils import (
- ExtractorError,
- int_or_none,
- parse_age_limit,
- str_or_none,
- try_get,
-)
-
-
-class ViceIE(AdobePassIE):
- IE_NAME = 'vice'
- _VALID_URL = r'https?://(?:(?:video|vms)\.vice|(?:www\.)?viceland)\.com/(?P<locale>[^/]+)/(?:video/[^/]+|embed)/(?P<id>[\da-f]+)'
- _TESTS = [{
- 'url': 'https://video.vice.com/en_us/video/pet-cremator/58c69e38a55424f1227dc3f7',
- 'info_dict': {
- 'id': '5e647f0125e145c9aef2069412c0cbde',
- 'ext': 'mp4',
- 'title': '10 Questions You Always Wanted To Ask: Pet Cremator',
- 'description': 'md5:fe856caacf61fe0e74fab15ce2b07ca5',
- 'uploader': 'vice',
- 'uploader_id': '57a204088cb727dec794c67b',
- 'timestamp': 1489664942,
- 'upload_date': '20170316',
- 'age_limit': 14,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- 'add_ie': ['UplynkPreplay'],
- }, {
- # geo restricted to US
- 'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56',
- 'info_dict': {
- 'id': '930c0ad1f47141cc955087eecaddb0e2',
- 'ext': 'mp4',
- 'uploader': 'waypoint',
- 'title': 'The Signal From Tölva',
- 'description': 'md5:3927e3c79f9e8094606a2b3c5b5e55d5',
- 'uploader_id': '57f7d621e05ca860fa9ccaf9',
- 'timestamp': 1477941983,
- 'upload_date': '20161031',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- 'add_ie': ['UplynkPreplay'],
- }, {
- 'url': 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f',
- 'info_dict': {
- 'id': '581b12b60a0e1f4c0fb6ea2f',
- 'ext': 'mp4',
- 'title': 'ULFs - Wien berüchtigste Grafitti Crew - Part 1',
- 'description': '<p>Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.</p>',
- 'uploader': 'VICE',
- 'uploader_id': '57a204088cb727dec794c67b',
- 'timestamp': 1485368119,
- 'upload_date': '20170125',
- 'age_limit': 14,
- },
- 'params': {
- # AES-encrypted m3u8
- 'skip_download': True,
- 'proxy': '127.0.0.1:8118',
- },
- 'add_ie': ['UplynkPreplay'],
- }, {
- 'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4',
- 'only_matching': True,
- }, {
- 'url': 'https://video.vice.com/en_us/embed/57f41d3556a0a80f54726060',
- 'only_matching': True,
- }, {
- 'url': 'https://vms.vice.com/en_us/video/preplay/58c69e38a55424f1227dc3f7',
- 'only_matching': True,
- }, {
- 'url': 'https://www.viceland.com/en_us/video/thursday-march-1-2018/5a8f2d7ff1cdb332dd446ec1',
- 'only_matching': True,
- }]
-
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe\b[^>]+\bsrc=["\']((?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]+)',
- webpage)
-
- @staticmethod
- def _extract_url(webpage):
- urls = ViceIE._extract_urls(webpage)
- return urls[0] if urls else None
-
- def _real_extract(self, url):
- locale, video_id = re.match(self._VALID_URL, url).groups()
-
- webpage = self._download_webpage(
- 'https://video.vice.com/%s/embed/%s' % (locale, video_id),
- video_id)
-
- video = self._parse_json(
- self._search_regex(
- r'PREFETCH_DATA\s*=\s*({.+?})\s*;\s*\n', webpage,
- 'app state'), video_id)['video']
- video_id = video.get('vms_id') or video.get('id') or video_id
- title = video['title']
- is_locked = video.get('locked')
- rating = video.get('rating')
- thumbnail = video.get('thumbnail_url')
- duration = int_or_none(video.get('duration'))
- series = try_get(
- video, lambda x: x['episode']['season']['show']['title'],
- compat_str)
- episode_number = try_get(
- video, lambda x: x['episode']['episode_number'])
- season_number = try_get(
- video, lambda x: x['episode']['season']['season_number'])
- uploader = None
-
- query = {}
- if is_locked:
- resource = self._get_mvpd_resource(
- 'VICELAND', title, video_id, rating)
- query['tvetoken'] = self._extract_mvpd_auth(
- url, video_id, 'VICELAND', resource)
-
- # signature generation algorithm is reverse engineered from signatureGenerator in
- # webpack:///../shared/~/vice-player/dist/js/vice-player.js in
- # https://www.viceland.com/assets/common/js/web.vendor.bundle.js
- # new JS is located here https://vice-web-statics-cdn.vice.com/vice-player/player-embed.js
- exp = int(time.time()) + 1440
-
- query.update({
- 'exp': exp,
- 'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
- '_ad_blocked': None,
- '_ad_unit': '',
- '_debug': '',
- 'platform': 'desktop',
- 'rn': random.randint(10000, 100000),
- 'fbprebidtoken': '',
- })
-
- try:
- preplay = self._download_json(
- 'https://vms.vice.com/%s/video/preplay/%s' % (locale, video_id),
- video_id, query=query)
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401):
- error = json.loads(e.cause.read().decode())
- error_message = error.get('error_description') or error['details']
- raise ExtractorError('%s said: %s' % (
- self.IE_NAME, error_message), expected=True)
- raise
-
- video_data = preplay['video']
- base = video_data['base']
- uplynk_preplay_url = preplay['preplayURL']
- episode = video_data.get('episode', {})
- channel = video_data.get('channel', {})
-
- subtitles = {}
- cc_url = preplay.get('ccURL')
- if cc_url:
- subtitles['en'] = [{
- 'url': cc_url,
- }]
-
- return {
- '_type': 'url_transparent',
- 'url': uplynk_preplay_url,
- 'id': video_id,
- 'title': title,
- 'description': base.get('body') or base.get('display_body'),
- 'thumbnail': thumbnail,
- 'duration': int_or_none(video_data.get('video_duration')) or duration,
- 'timestamp': int_or_none(video_data.get('created_at'), 1000),
- 'age_limit': parse_age_limit(video_data.get('video_rating')),
- 'series': video_data.get('show_title') or series,
- 'episode_number': int_or_none(episode.get('episode_number') or episode_number),
- 'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
- 'season_number': int_or_none(season_number),
- 'season_id': str_or_none(episode.get('season_id')),
- 'uploader': channel.get('base', {}).get('title') or channel.get('name') or uploader,
- 'uploader_id': str_or_none(channel.get('id')),
- 'subtitles': subtitles,
- 'ie_key': 'UplynkPreplay',
- }
-
-
-class ViceShowIE(InfoExtractor):
- IE_NAME = 'vice:show'
- _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?show/(?P<id>[^/?#&]+)'
-
- _TEST = {
- 'url': 'https://munchies.vice.com/en/show/fuck-thats-delicious-2',
- 'info_dict': {
- 'id': 'fuck-thats-delicious-2',
- 'title': "Fuck, That's Delicious",
- 'description': 'Follow the culinary adventures of rapper Action Bronson during his ongoing world tour.',
- },
- 'playlist_count': 17,
- }
-
- def _real_extract(self, url):
- show_id = self._match_id(url)
- webpage = self._download_webpage(url, show_id)
-
- entries = [
- self.url_result(video_url, ViceIE.ie_key())
- for video_url, _ in re.findall(
- r'<h2[^>]+class="article-title"[^>]+data-id="\d+"[^>]*>\s*<a[^>]+href="(%s.*?)"'
- % ViceIE._VALID_URL, webpage)]
-
- title = self._search_regex(
- r'<title>(.+?)</title>', webpage, 'title', default=None)
- if title:
- title = re.sub(r'(.+)\s*\|\s*.+$', r'\1', title).strip()
- description = self._html_search_meta(
- 'description', webpage, 'description')
-
- return self.playlist_result(entries, show_id, title, description)
-
-
-class ViceArticleIE(InfoExtractor):
- IE_NAME = 'vice:article'
- _VALID_URL = r'https://www\.vice\.com/[^/]+/article/(?P<id>[^?#]+)'
-
- _TESTS = [{
- 'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah',
- 'info_dict': {
- 'id': '41eae2a47b174a1398357cec55f1f6fc',
- 'ext': 'mp4',
- 'title': 'Mormon War on Porn ',
- 'description': 'md5:6394a8398506581d0346b9ab89093fef',
- 'uploader': 'vice',
- 'uploader_id': '57a204088cb727dec794c67b',
- 'timestamp': 1491883129,
- 'upload_date': '20170411',
- 'age_limit': 17,
- },
- 'params': {
- # AES-encrypted m3u8
- 'skip_download': True,
- },
- 'add_ie': ['UplynkPreplay'],
- }, {
- 'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car',
- 'md5': '7fe8ebc4fa3323efafc127b82bd821d9',
- 'info_dict': {
- 'id': '3jstaBeXgAs',
- 'ext': 'mp4',
- 'title': 'How to Hack a Car: Phreaked Out (Episode 2)',
- 'description': 'md5:ee95453f7ff495db8efe14ae8bf56f30',
- 'uploader': 'Motherboard',
- 'uploader_id': 'MotherboardTV',
- 'upload_date': '20140529',
- },
- 'add_ie': ['Youtube'],
- }, {
- 'url': 'https://www.vice.com/en_us/article/znm9dx/karley-sciortino-slutever-reloaded',
- 'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
- 'info_dict': {
- 'id': 'e2ed435eb67e43efb66e6ef9a6930a88',
- 'ext': 'mp4',
- 'title': "Making The World's First Male Sex Doll",
- 'description': 'md5:916078ef0e032d76343116208b6cc2c4',
- 'uploader': 'vice',
- 'uploader_id': '57a204088cb727dec794c67b',
- 'timestamp': 1476919911,
- 'upload_date': '20161019',
- 'age_limit': 17,
- },
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': [ViceIE.ie_key()],
- }, {
- 'url': 'https://www.vice.com/en_us/article/cowboy-capitalists-part-1',
- 'only_matching': True,
- }, {
- 'url': 'https://www.vice.com/ru/article/big-night-out-ibiza-clive-martin-229',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- webpage = self._download_webpage(url, display_id)
-
- prefetch_data = self._parse_json(self._search_regex(
- r'__APP_STATE\s*=\s*({.+?})(?:\s*\|\|\s*{}\s*)?;\s*\n',
- webpage, 'app state'), display_id)['pageData']
- body = prefetch_data['body']
-
- def _url_res(video_url, ie_key):
- return {
- '_type': 'url_transparent',
- 'url': video_url,
- 'display_id': display_id,
- 'ie_key': ie_key,
- }
-
- vice_url = ViceIE._extract_url(webpage)
- if vice_url:
- return _url_res(vice_url, ViceIE.ie_key())
-
- embed_code = self._search_regex(
- r'embedCode=([^&\'"]+)', body,
- 'ooyala embed code', default=None)
- if embed_code:
- return _url_res('ooyala:%s' % embed_code, 'Ooyala')
-
- youtube_url = YoutubeIE._extract_url(body)
- if youtube_url:
- return _url_res(youtube_url, YoutubeIE.ie_key())
-
- video_url = self._html_search_regex(
- r'data-video-url="([^"]+)"',
- prefetch_data['embed_code'], 'video URL')
-
- return _url_res(video_url, ViceIE.ie_key())
diff --git a/youtube_dl/extractor/videa.py b/youtube_dl/extractor/videa.py
deleted file mode 100644
index d0e34c819..000000000
--- a/youtube_dl/extractor/videa.py
+++ /dev/null
@@ -1,106 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- mimetype2ext,
- parse_codecs,
- xpath_element,
- xpath_text,
-)
-
-
-class VideaIE(InfoExtractor):
- _VALID_URL = r'''(?x)
- https?://
- videa(?:kid)?\.hu/
- (?:
- videok/(?:[^/]+/)*[^?#&]+-|
- player\?.*?\bv=|
- player/v/
- )
- (?P<id>[^?#&]+)
- '''
- _TESTS = [{
- 'url': 'http://videa.hu/videok/allatok/az-orult-kigyasz-285-kigyot-kigyo-8YfIAjxwWGwT8HVQ',
- 'md5': '97a7af41faeaffd9f1fc864a7c7e7603',
- 'info_dict': {
- 'id': '8YfIAjxwWGwT8HVQ',
- 'ext': 'mp4',
- 'title': 'Az őrült kígyász 285 kígyót enged szabadon',
- 'thumbnail': r're:^https?://.*',
- 'duration': 21,
- },
- }, {
- 'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
- 'only_matching': True,
- }, {
- 'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ',
- 'only_matching': True,
- }, {
- 'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
- 'only_matching': True,
- }, {
- 'url': 'https://videakid.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
- 'only_matching': True,
- }, {
- 'url': 'https://videakid.hu/player?v=8YfIAjxwWGwT8HVQ',
- 'only_matching': True,
- }, {
- 'url': 'https://videakid.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
- 'only_matching': True,
- }]
-
- @staticmethod
- def _extract_urls(webpage):
- return [url for _, url in re.findall(
- r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1',
- webpage)]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- info = self._download_xml(
- 'http://videa.hu/videaplayer_get_xml.php', video_id,
- query={'v': video_id})
-
- video = xpath_element(info, './/video', 'video', fatal=True)
- sources = xpath_element(info, './/video_sources', 'sources', fatal=True)
-
- title = xpath_text(video, './title', fatal=True)
-
- formats = []
- for source in sources.findall('./video_source'):
- source_url = source.text
- if not source_url:
- continue
- f = parse_codecs(source.get('codecs'))
- f.update({
- 'url': source_url,
- 'ext': mimetype2ext(source.get('mimetype')) or 'mp4',
- 'format_id': source.get('name'),
- 'width': int_or_none(source.get('width')),
- 'height': int_or_none(source.get('height')),
- })
- formats.append(f)
- self._sort_formats(formats)
-
- thumbnail = xpath_text(video, './poster_src')
- duration = int_or_none(xpath_text(video, './duration'))
-
- age_limit = None
- is_adult = xpath_text(video, './is_adult_content', default=None)
- if is_adult:
- age_limit = 18 if is_adult == '1' else 0
-
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'age_limit': age_limit,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/videodetective.py b/youtube_dl/extractor/videodetective.py
deleted file mode 100644
index a19411a05..000000000
--- a/youtube_dl/extractor/videodetective.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..compat import compat_urlparse
-from .internetvideoarchive import InternetVideoArchiveIE
-
-
-class VideoDetectiveIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)'
-
- _TEST = {
- 'url': 'http://www.videodetective.com/movies/kick-ass-2/194487',
- 'info_dict': {
- 'id': '194487',
- 'ext': 'mp4',
- 'title': 'KICK-ASS 2',
- 'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- og_video = self._og_search_video_url(webpage)
- query = compat_urlparse.urlparse(og_video).query
- return self.url_result(InternetVideoArchiveIE._build_json_url(query), ie=InternetVideoArchiveIE.ie_key())
diff --git a/youtube_dl/extractor/videopremium.py b/youtube_dl/extractor/videopremium.py
deleted file mode 100644
index cf690d7b0..000000000
--- a/youtube_dl/extractor/videopremium.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-import random
-
-from .common import InfoExtractor
-
-
-class VideoPremiumIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?videopremium\.(?:tv|me)/(?P<id>\w+)(?:/.*)?'
- _TEST = {
- 'url': 'http://videopremium.tv/4w7oadjsf156',
- 'info_dict': {
- 'id': '4w7oadjsf156',
- 'ext': 'f4v',
- 'title': 'youtube-dl_test_video____a_________-BaW_jenozKc.mp4.mp4'
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'Test file has been deleted.',
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage_url = 'http://videopremium.tv/' + video_id
- webpage = self._download_webpage(webpage_url, video_id)
-
- if re.match(r'^<html><head><script[^>]*>window\.location\s*=', webpage):
- # Download again, we need a cookie
- webpage = self._download_webpage(
- webpage_url, video_id,
- note='Downloading webpage again (with cookie)')
-
- video_title = self._html_search_regex(
- r'<h2(?:.*?)>\s*(.+?)\s*<', webpage, 'video title')
-
- return {
- 'id': video_id,
- 'url': 'rtmp://e%d.md.iplay.md/play' % random.randint(1, 16),
- 'play_path': 'mp4:%s.f4v' % video_id,
- 'page_url': 'http://videopremium.tv/' + video_id,
- 'player_url': 'http://videopremium.tv/uplayer/uppod.swf',
- 'ext': 'f4v',
- 'title': video_title,
- }
diff --git a/youtube_dl/extractor/vidzi.py b/youtube_dl/extractor/vidzi.py
deleted file mode 100644
index 42ea4952c..000000000
--- a/youtube_dl/extractor/vidzi.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- decode_packed_codes,
- js_to_json,
- NO_DEFAULT,
- PACKED_CODES_RE,
-)
-
-
-class VidziIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc|si|nu)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
- _TESTS = [{
- 'url': 'http://vidzi.tv/cghql9yq6emu.html',
- 'md5': '4f16c71ca0c8c8635ab6932b5f3f1660',
- 'info_dict': {
- 'id': 'cghql9yq6emu',
- 'ext': 'mp4',
- 'title': 'youtube-dl test video 1\\\\2\'3/4<5\\\\6ä7↭',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- 'url': 'http://vidzi.tv/embed-4z2yb0rzphe9-600x338.html',
- 'only_matching': True,
- }, {
- 'url': 'http://vidzi.cc/cghql9yq6emu.html',
- 'only_matching': True,
- }, {
- 'url': 'https://vidzi.si/rph9gztxj1et.html',
- 'only_matching': True,
- }, {
- 'url': 'http://vidzi.nu/cghql9yq6emu.html',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(
- 'http://vidzi.tv/%s' % video_id, video_id)
- title = self._html_search_regex(
- r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
-
- codes = [webpage]
- codes.extend([
- decode_packed_codes(mobj.group(0)).replace('\\\'', '\'')
- for mobj in re.finditer(PACKED_CODES_RE, webpage)])
- for num, code in enumerate(codes, 1):
- jwplayer_data = self._parse_json(
- self._search_regex(
- r'setup\(([^)]+)\)', code, 'jwplayer data',
- default=NO_DEFAULT if num == len(codes) else '{}'),
- video_id, transform_source=lambda s: js_to_json(
- re.sub(r'\s*\+\s*window\[.+?\]', '', s)))
- if jwplayer_data:
- break
-
- info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False)
- info_dict['title'] = title
-
- return info_dict
diff --git a/youtube_dl/extractor/viewlift.py b/youtube_dl/extractor/viewlift.py
deleted file mode 100644
index 851ad936c..000000000
--- a/youtube_dl/extractor/viewlift.py
+++ /dev/null
@@ -1,302 +0,0 @@
-from __future__ import unicode_literals
-
-import base64
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_urllib_parse_unquote
-from ..utils import (
- ExtractorError,
- clean_html,
- determine_ext,
- int_or_none,
- js_to_json,
- parse_age_limit,
- parse_duration,
- try_get,
-)
-
-
-class ViewLiftBaseIE(InfoExtractor):
- _DOMAINS_REGEX = r'(?:(?:main\.)?snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm)\.com|hoichoi\.tv'
-
-
-class ViewLiftEmbedIE(ViewLiftBaseIE):
- _VALID_URL = r'https?://(?:(?:www|embed)\.)?(?:%s)/embed/player\?.*\bfilmId=(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' % ViewLiftBaseIE._DOMAINS_REGEX
- _TESTS = [{
- 'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500',
- 'md5': '2924e9215c6eff7a55ed35b72276bd93',
- 'info_dict': {
- 'id': '74849a00-85a9-11e1-9660-123139220831',
- 'ext': 'mp4',
- 'title': '#whilewewatch',
- }
- }, {
- # invalid labels, 360p is better that 480p
- 'url': 'http://www.snagfilms.com/embed/player?filmId=17ca0950-a74a-11e0-a92a-0026bb61d036',
- 'md5': '882fca19b9eb27ef865efeeaed376a48',
- 'info_dict': {
- 'id': '17ca0950-a74a-11e0-a92a-0026bb61d036',
- 'ext': 'mp4',
- 'title': 'Life in Limbo',
- }
- }, {
- 'url': 'http://www.snagfilms.com/embed/player?filmId=0000014c-de2f-d5d6-abcf-ffef58af0017',
- 'only_matching': True,
- }]
-
- @staticmethod
- def _extract_url(webpage):
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?(?:%s)/embed/player.+?)\1' % ViewLiftBaseIE._DOMAINS_REGEX,
- webpage)
- if mobj:
- return mobj.group('url')
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- if '>This film is not playable in your area.<' in webpage:
- raise ExtractorError(
- 'Film %s is not playable in your area.' % video_id, expected=True)
-
- formats = []
- has_bitrate = False
- sources = self._parse_json(self._search_regex(
- r'(?s)sources:\s*(\[.+?\]),', webpage,
- 'sources', default='[]'), video_id, js_to_json)
- for source in sources:
- file_ = source.get('file')
- if not file_:
- continue
- type_ = source.get('type')
- ext = determine_ext(file_)
- format_id = source.get('label') or ext
- if all(v in ('m3u8', 'hls') for v in (type_, ext)):
- formats.extend(self._extract_m3u8_formats(
- file_, video_id, 'mp4', 'm3u8_native',
- m3u8_id='hls', fatal=False))
- else:
- bitrate = int_or_none(self._search_regex(
- [r'(\d+)kbps', r'_\d{1,2}x\d{1,2}_(\d{3,})\.%s' % ext],
- file_, 'bitrate', default=None))
- if not has_bitrate and bitrate:
- has_bitrate = True
- height = int_or_none(self._search_regex(
- r'^(\d+)[pP]$', format_id, 'height', default=None))
- formats.append({
- 'url': file_,
- 'format_id': 'http-%s%s' % (format_id, ('-%dk' % bitrate if bitrate else '')),
- 'tbr': bitrate,
- 'height': height,
- })
- if not formats:
- hls_url = self._parse_json(self._search_regex(
- r'filmInfo\.src\s*=\s*({.+?});',
- webpage, 'src'), video_id, js_to_json)['src']
- formats = self._extract_m3u8_formats(
- hls_url, video_id, 'mp4', 'm3u8_native',
- m3u8_id='hls', fatal=False)
- field_preference = None if has_bitrate else ('height', 'tbr', 'format_id')
- self._sort_formats(formats, field_preference)
-
- title = self._search_regex(
- [r"title\s*:\s*'([^']+)'", r'<title>([^<]+)</title>'],
- webpage, 'title')
-
- return {
- 'id': video_id,
- 'title': title,
- 'formats': formats,
- }
-
-
-class ViewLiftIE(ViewLiftBaseIE):
- _VALID_URL = r'https?://(?:www\.)?(?P<domain>%s)(?:/(?:films/title|show|(?:news/)?videos?))?/(?P<id>[^?#]+)' % ViewLiftBaseIE._DOMAINS_REGEX
- _TESTS = [{
- 'url': 'http://www.snagfilms.com/films/title/lost_for_life',
- 'md5': '19844f897b35af219773fd63bdec2942',
- 'info_dict': {
- 'id': '0000014c-de2f-d5d6-abcf-ffef58af0017',
- 'display_id': 'lost_for_life',
- 'ext': 'mp4',
- 'title': 'Lost for Life',
- 'description': 'md5:ea10b5a50405ae1f7b5269a6ec594102',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'duration': 4489,
- 'categories': 'mincount:3',
- 'age_limit': 14,
- 'upload_date': '20150421',
- 'timestamp': 1429656820,
- }
- }, {
- 'url': 'http://www.snagfilms.com/show/the_world_cut_project/india',
- 'md5': 'e6292e5b837642bbda82d7f8bf3fbdfd',
- 'info_dict': {
- 'id': '00000145-d75c-d96e-a9c7-ff5c67b20000',
- 'display_id': 'the_world_cut_project/india',
- 'ext': 'mp4',
- 'title': 'India',
- 'description': 'md5:5c168c5a8f4719c146aad2e0dfac6f5f',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'duration': 979,
- 'timestamp': 1399478279,
- 'upload_date': '20140507',
- }
- }, {
- 'url': 'http://main.snagfilms.com/augie_alone/s_2_ep_12_love',
- 'info_dict': {
- 'id': '00000148-7b53-de26-a9fb-fbf306f70020',
- 'display_id': 'augie_alone/s_2_ep_12_love',
- 'ext': 'mp4',
- 'title': 'Augie, Alone:S. 2 Ep. 12 - Love',
- 'description': 'md5:db2a5c72d994f16a780c1eb353a8f403',
- 'thumbnail': r're:^https?://.*\.jpg',
- 'duration': 107,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'http://main.snagfilms.com/films/title/the_freebie',
- 'only_matching': True,
- }, {
- # Film is not playable in your area.
- 'url': 'http://www.snagfilms.com/films/title/inside_mecca',
- 'only_matching': True,
- }, {
- # Film is not available.
- 'url': 'http://www.snagfilms.com/show/augie_alone/flirting',
- 'only_matching': True,
- }, {
- 'url': 'http://www.winnersview.com/videos/the-good-son',
- 'only_matching': True,
- }, {
- # Was once Kaltura embed
- 'url': 'https://www.monumentalsportsnetwork.com/videos/john-carlson-postgame-2-25-15',
- 'only_matching': True,
- }]
-
- @classmethod
- def suitable(cls, url):
- return False if ViewLiftEmbedIE.suitable(url) else super(ViewLiftIE, cls).suitable(url)
-
- def _real_extract(self, url):
- domain, display_id = re.match(self._VALID_URL, url).groups()
-
- webpage = self._download_webpage(url, display_id)
-
- if ">Sorry, the Film you're looking for is not available.<" in webpage:
- raise ExtractorError(
- 'Film %s is not available.' % display_id, expected=True)
-
- initial_store_state = self._search_regex(
- r"window\.initialStoreState\s*=.*?JSON\.parse\(unescape\(atob\('([^']+)'\)\)\)",
- webpage, 'Initial Store State', default=None)
- if initial_store_state:
- modules = self._parse_json(compat_urllib_parse_unquote(base64.b64decode(
- initial_store_state).decode()), display_id)['page']['data']['modules']
- content_data = next(m['contentData'][0] for m in modules if m.get('moduleType') == 'VideoDetailModule')
- gist = content_data['gist']
- film_id = gist['id']
- title = gist['title']
- video_assets = try_get(
- content_data, lambda x: x['streamingInfo']['videoAssets'], dict)
- if not video_assets:
- token = self._download_json(
- 'https://prod-api.viewlift.com/identity/anonymous-token',
- film_id, 'Downloading authorization token',
- query={'site': 'snagfilms'})['authorizationToken']
- video_assets = self._download_json(
- 'https://prod-api.viewlift.com/entitlement/video/status',
- film_id, headers={
- 'Authorization': token,
- 'Referer': url,
- }, query={
- 'id': film_id
- })['video']['streamingInfo']['videoAssets']
-
- formats = []
- mpeg_video_assets = video_assets.get('mpeg') or []
- for video_asset in mpeg_video_assets:
- video_asset_url = video_asset.get('url')
- if not video_asset:
- continue
- bitrate = int_or_none(video_asset.get('bitrate'))
- height = int_or_none(self._search_regex(
- r'^_?(\d+)[pP]$', video_asset.get('renditionValue'),
- 'height', default=None))
- formats.append({
- 'url': video_asset_url,
- 'format_id': 'http%s' % ('-%d' % bitrate if bitrate else ''),
- 'tbr': bitrate,
- 'height': height,
- 'vcodec': video_asset.get('codec'),
- })
-
- hls_url = video_assets.get('hls')
- if hls_url:
- formats.extend(self._extract_m3u8_formats(
- hls_url, film_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
- self._sort_formats(formats, ('height', 'tbr', 'format_id'))
-
- info = {
- 'id': film_id,
- 'display_id': display_id,
- 'title': title,
- 'description': gist.get('description'),
- 'thumbnail': gist.get('videoImageUrl'),
- 'duration': int_or_none(gist.get('runtime')),
- 'age_limit': parse_age_limit(content_data.get('parentalRating')),
- 'timestamp': int_or_none(gist.get('publishDate'), 1000),
- 'formats': formats,
- }
- for k in ('categories', 'tags'):
- info[k] = [v['title'] for v in content_data.get(k, []) if v.get('title')]
- return info
- else:
- film_id = self._search_regex(r'filmId=([\da-f-]{36})"', webpage, 'film id')
-
- snag = self._parse_json(
- self._search_regex(
- r'Snag\.page\.data\s*=\s*(\[.+?\]);', webpage, 'snag', default='[]'),
- display_id)
-
- for item in snag:
- if item.get('data', {}).get('film', {}).get('id') == film_id:
- data = item['data']['film']
- title = data['title']
- description = clean_html(data.get('synopsis'))
- thumbnail = data.get('image')
- duration = int_or_none(data.get('duration') or data.get('runtime'))
- categories = [
- category['title'] for category in data.get('categories', [])
- if category.get('title')]
- break
- else:
- title = self._html_search_regex(
- (r'itemprop="title">([^<]+)<',
- r'(?s)itemprop="title">(.+?)<div'), webpage, 'title')
- description = self._html_search_regex(
- r'(?s)<div itemprop="description" class="film-synopsis-inner ">(.+?)</div>',
- webpage, 'description', default=None) or self._og_search_description(webpage)
- thumbnail = self._og_search_thumbnail(webpage)
- duration = parse_duration(self._search_regex(
- r'<span itemprop="duration" class="film-duration strong">([^<]+)<',
- webpage, 'duration', fatal=False))
- categories = re.findall(r'<a href="/movies/[^"]+">([^<]+)</a>', webpage)
-
- return {
- '_type': 'url_transparent',
- 'url': 'http://%s/embed/player?filmId=%s' % (domain, film_id),
- 'id': film_id,
- 'display_id': display_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'categories': categories,
- 'ie_key': 'ViewLiftEmbed',
- }
diff --git a/youtube_dl/extractor/viewster.py b/youtube_dl/extractor/viewster.py
deleted file mode 100644
index 6e318479c..000000000
--- a/youtube_dl/extractor/viewster.py
+++ /dev/null
@@ -1,217 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import (
- compat_HTTPError,
- compat_urllib_parse_unquote,
-)
-from ..utils import (
- determine_ext,
- ExtractorError,
- int_or_none,
- parse_iso8601,
- sanitized_Request,
- HEADRequest,
- url_basename,
-)
-
-
-class ViewsterIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?viewster\.com/(?:serie|movie)/(?P<id>\d+-\d+-\d+)'
- _TESTS = [{
- # movie, Type=Movie
- 'url': 'http://www.viewster.com/movie/1140-11855-000/the-listening-project/',
- 'md5': 'e642d1b27fcf3a4ffa79f194f5adde36',
- 'info_dict': {
- 'id': '1140-11855-000',
- 'ext': 'mp4',
- 'title': 'The listening Project',
- 'description': 'md5:bac720244afd1a8ea279864e67baa071',
- 'timestamp': 1214870400,
- 'upload_date': '20080701',
- 'duration': 4680,
- },
- }, {
- # series episode, Type=Episode
- 'url': 'http://www.viewster.com/serie/1284-19427-001/the-world-and-a-wall/',
- 'md5': '9243079a8531809efe1b089db102c069',
- 'info_dict': {
- 'id': '1284-19427-001',
- 'ext': 'mp4',
- 'title': 'The World and a Wall',
- 'description': 'md5:24814cf74d3453fdf5bfef9716d073e3',
- 'timestamp': 1428192000,
- 'upload_date': '20150405',
- 'duration': 1500,
- },
- }, {
- # serie, Type=Serie
- 'url': 'http://www.viewster.com/serie/1303-19426-000/',
- 'info_dict': {
- 'id': '1303-19426-000',
- 'title': 'Is It Wrong to Try to Pick up Girls in a Dungeon?',
- 'description': 'md5:eeda9bef25b0d524b3a29a97804c2f11',
- },
- 'playlist_count': 13,
- }, {
- # unfinished serie, no Type
- 'url': 'http://www.viewster.com/serie/1284-19427-000/baby-steps-season-2/',
- 'info_dict': {
- 'id': '1284-19427-000',
- 'title': 'Baby Steps—Season 2',
- 'description': 'md5:e7097a8fc97151e25f085c9eb7a1cdb1',
- },
- 'playlist_mincount': 16,
- }, {
- # geo restricted series
- 'url': 'https://www.viewster.com/serie/1280-18794-002/',
- 'only_matching': True,
- }, {
- # geo restricted video
- 'url': 'https://www.viewster.com/serie/1280-18794-002/what-is-extraterritoriality-lawo/',
- 'only_matching': True,
- }]
-
- _ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01'
-
- def _download_json(self, url, video_id, note='Downloading JSON metadata', fatal=True, query={}):
- request = sanitized_Request(url)
- request.add_header('Accept', self._ACCEPT_HEADER)
- request.add_header('Auth-token', self._AUTH_TOKEN)
- return super(ViewsterIE, self)._download_json(request, video_id, note, fatal=fatal, query=query)
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- # Get 'api_token' cookie
- self._request_webpage(
- HEADRequest('http://www.viewster.com/'),
- video_id, headers=self.geo_verification_headers())
- cookies = self._get_cookies('http://www.viewster.com/')
- self._AUTH_TOKEN = compat_urllib_parse_unquote(cookies['api_token'].value)
-
- info = self._download_json(
- 'https://public-api.viewster.com/search/%s' % video_id,
- video_id, 'Downloading entry JSON')
-
- entry_id = info.get('Id') or info['id']
-
- # unfinished serie has no Type
- if info.get('Type') in ('Serie', None):
- try:
- episodes = self._download_json(
- 'https://public-api.viewster.com/series/%s/episodes' % entry_id,
- video_id, 'Downloading series JSON')
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
- self.raise_geo_restricted()
- else:
- raise
- entries = [
- self.url_result(
- 'http://www.viewster.com/movie/%s' % episode['OriginId'], 'Viewster')
- for episode in episodes]
- title = (info.get('Title') or info['Synopsis']['Title']).strip()
- description = info.get('Synopsis', {}).get('Detailed')
- return self.playlist_result(entries, video_id, title, description)
-
- formats = []
- for language_set in info.get('LanguageSets', []):
- manifest_url = None
- m3u8_formats = []
- audio = language_set.get('Audio') or ''
- subtitle = language_set.get('Subtitle') or ''
- base_format_id = audio
- if subtitle:
- base_format_id += '-%s' % subtitle
-
- def concat(suffix, sep='-'):
- return (base_format_id + '%s%s' % (sep, suffix)) if base_format_id else suffix
-
- medias = self._download_json(
- 'https://public-api.viewster.com/movies/%s/videos' % entry_id,
- video_id, fatal=False, query={
- 'mediaTypes': ['application/f4m+xml', 'application/x-mpegURL', 'video/mp4'],
- 'language': audio,
- 'subtitle': subtitle,
- })
- if not medias:
- continue
- for media in medias:
- video_url = media.get('Uri')
- if not video_url:
- continue
- ext = determine_ext(video_url)
- if ext == 'f4m':
- manifest_url = video_url
- video_url += '&' if '?' in video_url else '?'
- video_url += 'hdcore=3.2.0&plugin=flowplayer-3.2.0.1'
- formats.extend(self._extract_f4m_formats(
- video_url, video_id, f4m_id=concat('hds')))
- elif ext == 'm3u8':
- manifest_url = video_url
- m3u8_formats = self._extract_m3u8_formats(
- video_url, video_id, 'mp4', m3u8_id=concat('hls'),
- fatal=False) # m3u8 sometimes fail
- if m3u8_formats:
- formats.extend(m3u8_formats)
- else:
- qualities_basename = self._search_regex(
- r'/([^/]+)\.csmil/',
- manifest_url, 'qualities basename', default=None)
- if not qualities_basename:
- continue
- QUALITIES_RE = r'((,\d+k)+,?)'
- qualities = self._search_regex(
- QUALITIES_RE, qualities_basename,
- 'qualities', default=None)
- if not qualities:
- continue
- qualities = list(map(lambda q: int(q[:-1]), qualities.strip(',').split(',')))
- qualities.sort()
- http_template = re.sub(QUALITIES_RE, r'%dk', qualities_basename)
- http_url_basename = url_basename(video_url)
- if m3u8_formats:
- self._sort_formats(m3u8_formats)
- m3u8_formats = list(filter(
- lambda f: f.get('vcodec') != 'none', m3u8_formats))
- if len(qualities) == len(m3u8_formats):
- for q, m3u8_format in zip(qualities, m3u8_formats):
- f = m3u8_format.copy()
- f.update({
- 'url': video_url.replace(http_url_basename, http_template % q),
- 'format_id': f['format_id'].replace('hls', 'http'),
- 'protocol': 'http',
- })
- formats.append(f)
- else:
- for q in qualities:
- formats.append({
- 'url': video_url.replace(http_url_basename, http_template % q),
- 'ext': 'mp4',
- 'format_id': 'http-%d' % q,
- 'tbr': q,
- })
-
- if not formats and not info.get('VODSettings'):
- self.raise_geo_restricted()
-
- self._sort_formats(formats)
-
- synopsis = info.get('Synopsis') or {}
- # Prefer title outside synopsis since it's less messy
- title = (info.get('Title') or synopsis['Title']).strip()
- description = synopsis.get('Detailed') or (info.get('Synopsis') or {}).get('Short')
- duration = int_or_none(info.get('Duration'))
- timestamp = parse_iso8601(info.get('ReleaseDate'))
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'timestamp': timestamp,
- 'duration': duration,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/viki.py b/youtube_dl/extractor/viki.py
deleted file mode 100644
index b0dcdc0e6..000000000
--- a/youtube_dl/extractor/viki.py
+++ /dev/null
@@ -1,384 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import hashlib
-import hmac
-import itertools
-import json
-import re
-import time
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- int_or_none,
- parse_age_limit,
- parse_iso8601,
- sanitized_Request,
-)
-
-
-class VikiBaseIE(InfoExtractor):
- _VALID_URL_BASE = r'https?://(?:www\.)?viki\.(?:com|net|mx|jp|fr)/'
- _API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com'
- _API_URL_TEMPLATE = 'https://api.viki.io%s&sig=%s'
-
- _APP = '100005a'
- _APP_VERSION = '2.2.5.1428709186'
- _APP_SECRET = 'MM_d*yP@`&1@]@!AVrXf_o-HVEnoTnm$O-ti4[G~$JDI/Dc-&piU&z&5.;:}95=Iad'
-
- _GEO_BYPASS = False
- _NETRC_MACHINE = 'viki'
-
- _token = None
-
- _ERRORS = {
- 'geo': 'Sorry, this content is not available in your region.',
- 'upcoming': 'Sorry, this content is not yet available.',
- # 'paywall': 'paywall',
- }
-
- def _prepare_call(self, path, timestamp=None, post_data=None):
- path += '?' if '?' not in path else '&'
- if not timestamp:
- timestamp = int(time.time())
- query = self._API_QUERY_TEMPLATE % (path, self._APP, timestamp)
- if self._token:
- query += '&token=%s' % self._token
- sig = hmac.new(
- self._APP_SECRET.encode('ascii'),
- query.encode('ascii'),
- hashlib.sha1
- ).hexdigest()
- url = self._API_URL_TEMPLATE % (query, sig)
- return sanitized_Request(
- url, json.dumps(post_data).encode('utf-8')) if post_data else url
-
- def _call_api(self, path, video_id, note, timestamp=None, post_data=None):
- resp = self._download_json(
- self._prepare_call(path, timestamp, post_data), video_id, note)
-
- error = resp.get('error')
- if error:
- if error == 'invalid timestamp':
- resp = self._download_json(
- self._prepare_call(path, int(resp['current_timestamp']), post_data),
- video_id, '%s (retry)' % note)
- error = resp.get('error')
- if error:
- self._raise_error(resp['error'])
-
- return resp
-
- def _raise_error(self, error):
- raise ExtractorError(
- '%s returned error: %s' % (self.IE_NAME, error),
- expected=True)
-
- def _check_errors(self, data):
- for reason, status in data.get('blocking', {}).items():
- if status and reason in self._ERRORS:
- message = self._ERRORS[reason]
- if reason == 'geo':
- self.raise_geo_restricted(msg=message)
- raise ExtractorError('%s said: %s' % (
- self.IE_NAME, message), expected=True)
-
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
- login_form = {
- 'login_id': username,
- 'password': password,
- }
-
- login = self._call_api(
- 'sessions.json', None,
- 'Logging in', post_data=login_form)
-
- self._token = login.get('token')
- if not self._token:
- self.report_warning('Unable to get session token, login has probably failed')
-
- @staticmethod
- def dict_selection(dict_obj, preferred_key, allow_fallback=True):
- if preferred_key in dict_obj:
- return dict_obj.get(preferred_key)
-
- if not allow_fallback:
- return
-
- filtered_dict = list(filter(None, [dict_obj.get(k) for k in dict_obj.keys()]))
- return filtered_dict[0] if filtered_dict else None
-
-
-class VikiIE(VikiBaseIE):
- IE_NAME = 'viki'
- _VALID_URL = r'%s(?:videos|player)/(?P<id>[0-9]+v)' % VikiBaseIE._VALID_URL_BASE
- _TESTS = [{
- 'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
- 'info_dict': {
- 'id': '1023585v',
- 'ext': 'mp4',
- 'title': 'Heirs Episode 14',
- 'uploader': 'SBS',
- 'description': 'md5:c4b17b9626dd4b143dcc4d855ba3474e',
- 'upload_date': '20131121',
- 'age_limit': 13,
- },
- 'skip': 'Blocked in the US',
- }, {
- # clip
- 'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
- 'md5': '86c0b5dbd4d83a6611a79987cc7a1989',
- 'info_dict': {
- 'id': '1067139v',
- 'ext': 'mp4',
- 'title': "'The Avengers: Age of Ultron' Press Conference",
- 'description': 'md5:d70b2f9428f5488321bfe1db10d612ea',
- 'duration': 352,
- 'timestamp': 1430380829,
- 'upload_date': '20150430',
- 'uploader': 'Arirang TV',
- 'like_count': int,
- 'age_limit': 0,
- }
- }, {
- 'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi',
- 'info_dict': {
- 'id': '1048879v',
- 'ext': 'mp4',
- 'title': 'Ankhon Dekhi',
- 'duration': 6512,
- 'timestamp': 1408532356,
- 'upload_date': '20140820',
- 'uploader': 'Spuul',
- 'like_count': int,
- 'age_limit': 13,
- },
- 'skip': 'Blocked in the US',
- }, {
- # episode
- 'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
- 'md5': '5fa476a902e902783ac7a4d615cdbc7a',
- 'info_dict': {
- 'id': '44699v',
- 'ext': 'mp4',
- 'title': 'Boys Over Flowers - Episode 1',
- 'description': 'md5:b89cf50038b480b88b5b3c93589a9076',
- 'duration': 4204,
- 'timestamp': 1270496524,
- 'upload_date': '20100405',
- 'uploader': 'group8',
- 'like_count': int,
- 'age_limit': 13,
- }
- }, {
- # youtube external
- 'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
- 'md5': '63f8600c1da6f01b7640eee7eca4f1da',
- 'info_dict': {
- 'id': '50562v',
- 'ext': 'webm',
- 'title': 'Poor Nastya [COMPLETE] - Episode 1',
- 'description': '',
- 'duration': 606,
- 'timestamp': 1274949505,
- 'upload_date': '20101213',
- 'uploader': 'ad14065n',
- 'uploader_id': 'ad14065n',
- 'like_count': int,
- 'age_limit': 13,
- }
- }, {
- 'url': 'http://www.viki.com/player/44699v',
- 'only_matching': True,
- }, {
- # non-English description
- 'url': 'http://www.viki.com/videos/158036v-love-in-magic',
- 'md5': '1713ae35df5a521b31f6dc40730e7c9c',
- 'info_dict': {
- 'id': '158036v',
- 'ext': 'mp4',
- 'uploader': 'I Planet Entertainment',
- 'upload_date': '20111122',
- 'timestamp': 1321985454,
- 'description': 'md5:44b1e46619df3a072294645c770cef36',
- 'title': 'Love In Magic',
- 'age_limit': 13,
- },
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- video = self._call_api(
- 'videos/%s.json' % video_id, video_id, 'Downloading video JSON')
-
- self._check_errors(video)
-
- title = self.dict_selection(video.get('titles', {}), 'en', allow_fallback=False)
- if not title:
- title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id
- container_titles = video.get('container', {}).get('titles', {})
- container_title = self.dict_selection(container_titles, 'en')
- title = '%s - %s' % (container_title, title)
-
- description = self.dict_selection(video.get('descriptions', {}), 'en')
-
- duration = int_or_none(video.get('duration'))
- timestamp = parse_iso8601(video.get('created_at'))
- uploader = video.get('author')
- like_count = int_or_none(video.get('likes', {}).get('count'))
- age_limit = parse_age_limit(video.get('rating'))
-
- thumbnails = []
- for thumbnail_id, thumbnail in video.get('images', {}).items():
- thumbnails.append({
- 'id': thumbnail_id,
- 'url': thumbnail.get('url'),
- })
-
- subtitles = {}
- for subtitle_lang, _ in video.get('subtitle_completions', {}).items():
- subtitles[subtitle_lang] = [{
- 'ext': subtitles_format,
- 'url': self._prepare_call(
- 'videos/%s/subtitles/%s.%s' % (video_id, subtitle_lang, subtitles_format)),
- } for subtitles_format in ('srt', 'vtt')]
-
- result = {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'duration': duration,
- 'timestamp': timestamp,
- 'uploader': uploader,
- 'like_count': like_count,
- 'age_limit': age_limit,
- 'thumbnails': thumbnails,
- 'subtitles': subtitles,
- }
-
- streams = self._call_api(
- 'videos/%s/streams.json' % video_id, video_id,
- 'Downloading video streams JSON')
-
- if 'external' in streams:
- result.update({
- '_type': 'url_transparent',
- 'url': streams['external']['url'],
- })
- return result
-
- formats = []
- for format_id, stream_dict in streams.items():
- height = int_or_none(self._search_regex(
- r'^(\d+)[pP]$', format_id, 'height', default=None))
- for protocol, format_dict in stream_dict.items():
- # rtmps URLs does not seem to work
- if protocol == 'rtmps':
- continue
- format_url = format_dict['url']
- if format_id == 'm3u8':
- m3u8_formats = self._extract_m3u8_formats(
- format_url, video_id, 'mp4',
- entry_protocol='m3u8_native',
- m3u8_id='m3u8-%s' % protocol, fatal=False)
- # Despite CODECS metadata in m3u8 all video-only formats
- # are actually video+audio
- for f in m3u8_formats:
- if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
- f['acodec'] = None
- formats.extend(m3u8_formats)
- elif format_url.startswith('rtmp'):
- mobj = re.search(
- r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
- format_url)
- if not mobj:
- continue
- formats.append({
- 'format_id': 'rtmp-%s' % format_id,
- 'ext': 'flv',
- 'url': mobj.group('url'),
- 'play_path': mobj.group('playpath'),
- 'app': mobj.group('app'),
- 'page_url': url,
- })
- else:
- formats.append({
- 'url': format_url,
- 'format_id': '%s-%s' % (format_id, protocol),
- 'height': height,
- })
- self._sort_formats(formats)
-
- result['formats'] = formats
- return result
-
-
-class VikiChannelIE(VikiBaseIE):
- IE_NAME = 'viki:channel'
- _VALID_URL = r'%s(?:tv|news|movies|artists)/(?P<id>[0-9]+c)' % VikiBaseIE._VALID_URL_BASE
- _TESTS = [{
- 'url': 'http://www.viki.com/tv/50c-boys-over-flowers',
- 'info_dict': {
- 'id': '50c',
- 'title': 'Boys Over Flowers',
- 'description': 'md5:ecd3cff47967fe193cff37c0bec52790',
- },
- 'playlist_mincount': 71,
- }, {
- 'url': 'http://www.viki.com/tv/1354c-poor-nastya-complete',
- 'info_dict': {
- 'id': '1354c',
- 'title': 'Poor Nastya [COMPLETE]',
- 'description': 'md5:05bf5471385aa8b21c18ad450e350525',
- },
- 'playlist_count': 127,
- }, {
- 'url': 'http://www.viki.com/news/24569c-showbiz-korea',
- 'only_matching': True,
- }, {
- 'url': 'http://www.viki.com/movies/22047c-pride-and-prejudice-2005',
- 'only_matching': True,
- }, {
- 'url': 'http://www.viki.com/artists/2141c-shinee',
- 'only_matching': True,
- }]
-
- _PER_PAGE = 25
-
- def _real_extract(self, url):
- channel_id = self._match_id(url)
-
- channel = self._call_api(
- 'containers/%s.json' % channel_id, channel_id,
- 'Downloading channel JSON')
-
- self._check_errors(channel)
-
- title = self.dict_selection(channel['titles'], 'en')
-
- description = self.dict_selection(channel['descriptions'], 'en')
-
- entries = []
- for video_type in ('episodes', 'clips', 'movies'):
- for page_num in itertools.count(1):
- page = self._call_api(
- 'containers/%s/%s.json?per_page=%d&sort=number&direction=asc&with_paging=true&page=%d'
- % (channel_id, video_type, self._PER_PAGE, page_num), channel_id,
- 'Downloading %s JSON page #%d' % (video_type, page_num))
- for video in page['response']:
- video_id = video['id']
- entries.append(self.url_result(
- 'https://www.viki.com/videos/%s' % video_id, 'Viki'))
- if not page['pagination']['next']:
- break
-
- return self.playlist_result(entries, channel_id, title, description)
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
deleted file mode 100644
index 9abd59d98..000000000
--- a/youtube_dl/extractor/vimeo.py
+++ /dev/null
@@ -1,1131 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import base64
-import functools
-import json
-import re
-import itertools
-
-from .common import InfoExtractor
-from ..compat import (
- compat_kwargs,
- compat_HTTPError,
- compat_str,
- compat_urlparse,
-)
-from ..utils import (
- determine_ext,
- ExtractorError,
- js_to_json,
- int_or_none,
- merge_dicts,
- NO_DEFAULT,
- OnDemandPagedList,
- parse_filesize,
- RegexNotFoundError,
- sanitized_Request,
- smuggle_url,
- std_headers,
- try_get,
- unified_timestamp,
- unsmuggle_url,
- urlencode_postdata,
- unescapeHTML,
-)
-
-
-class VimeoBaseInfoExtractor(InfoExtractor):
- _NETRC_MACHINE = 'vimeo'
- _LOGIN_REQUIRED = False
- _LOGIN_URL = 'https://vimeo.com/log_in'
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- if self._LOGIN_REQUIRED:
- raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
- return
- webpage = self._download_webpage(
- self._LOGIN_URL, None, 'Downloading login page')
- token, vuid = self._extract_xsrft_and_vuid(webpage)
- data = {
- 'action': 'login',
- 'email': username,
- 'password': password,
- 'service': 'vimeo',
- 'token': token,
- }
- self._set_vimeo_cookie('vuid', vuid)
- try:
- self._download_webpage(
- self._LOGIN_URL, None, 'Logging in',
- data=urlencode_postdata(data), headers={
- 'Content-Type': 'application/x-www-form-urlencoded',
- 'Referer': self._LOGIN_URL,
- })
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 418:
- raise ExtractorError(
- 'Unable to log in: bad username or password',
- expected=True)
- raise ExtractorError('Unable to log in')
-
- def _verify_video_password(self, url, video_id, webpage):
- password = self._downloader.params.get('videopassword')
- if password is None:
- raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
- token, vuid = self._extract_xsrft_and_vuid(webpage)
- data = urlencode_postdata({
- 'password': password,
- 'token': token,
- })
- if url.startswith('http://'):
- # vimeo only supports https now, but the user can give an http url
- url = url.replace('http://', 'https://')
- password_request = sanitized_Request(url + '/password', data)
- password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
- password_request.add_header('Referer', url)
- self._set_vimeo_cookie('vuid', vuid)
- return self._download_webpage(
- password_request, video_id,
- 'Verifying the password', 'Wrong password')
-
- def _extract_xsrft_and_vuid(self, webpage):
- xsrft = self._search_regex(
- r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
- webpage, 'login token', group='xsrft')
- vuid = self._search_regex(
- r'["\']vuid["\']\s*:\s*(["\'])(?P<vuid>.+?)\1',
- webpage, 'vuid', group='vuid')
- return xsrft, vuid
-
- def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs):
- vimeo_config = self._search_regex(
- r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));',
- webpage, 'vimeo config', *args, **compat_kwargs(kwargs))
- if vimeo_config:
- return self._parse_json(vimeo_config, video_id)
-
- def _set_vimeo_cookie(self, name, value):
- self._set_cookie('vimeo.com', name, value)
-
- def _vimeo_sort_formats(self, formats):
- # Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
- # at the same time without actual units specified. This lead to wrong sorting.
- self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'tbr', 'format_id'))
-
- def _parse_config(self, config, video_id):
- video_data = config['video']
- video_title = video_data['title']
- live_event = video_data.get('live_event') or {}
- is_live = live_event.get('status') == 'started'
-
- formats = []
- config_files = video_data.get('files') or config['request'].get('files', {})
- for f in config_files.get('progressive', []):
- video_url = f.get('url')
- if not video_url:
- continue
- formats.append({
- 'url': video_url,
- 'format_id': 'http-%s' % f.get('quality'),
- 'width': int_or_none(f.get('width')),
- 'height': int_or_none(f.get('height')),
- 'fps': int_or_none(f.get('fps')),
- 'tbr': int_or_none(f.get('bitrate')),
- })
-
- # TODO: fix handling of 308 status code returned for live archive manifest requests
- for files_type in ('hls', 'dash'):
- for cdn_name, cdn_data in config_files.get(files_type, {}).get('cdns', {}).items():
- manifest_url = cdn_data.get('url')
- if not manifest_url:
- continue
- format_id = '%s-%s' % (files_type, cdn_name)
- if files_type == 'hls':
- formats.extend(self._extract_m3u8_formats(
- manifest_url, video_id, 'mp4',
- 'm3u8' if is_live else 'm3u8_native', m3u8_id=format_id,
- note='Downloading %s m3u8 information' % cdn_name,
- fatal=False))
- elif files_type == 'dash':
- mpd_pattern = r'/%s/(?:sep/)?video/' % video_id
- mpd_manifest_urls = []
- if re.search(mpd_pattern, manifest_url):
- for suffix, repl in (('', 'video'), ('_sep', 'sep/video')):
- mpd_manifest_urls.append((format_id + suffix, re.sub(
- mpd_pattern, '/%s/%s/' % (video_id, repl), manifest_url)))
- else:
- mpd_manifest_urls = [(format_id, manifest_url)]
- for f_id, m_url in mpd_manifest_urls:
- if 'json=1' in m_url:
- real_m_url = (self._download_json(m_url, video_id, fatal=False) or {}).get('url')
- if real_m_url:
- m_url = real_m_url
- mpd_formats = self._extract_mpd_formats(
- m_url.replace('/master.json', '/master.mpd'), video_id, f_id,
- 'Downloading %s MPD information' % cdn_name,
- fatal=False)
- for f in mpd_formats:
- if f.get('vcodec') == 'none':
- f['preference'] = -50
- elif f.get('acodec') == 'none':
- f['preference'] = -40
- formats.extend(mpd_formats)
-
- live_archive = live_event.get('archive') or {}
- live_archive_source_url = live_archive.get('source_url')
- if live_archive_source_url and live_archive.get('status') == 'done':
- formats.append({
- 'format_id': 'live-archive-source',
- 'url': live_archive_source_url,
- 'preference': 1,
- })
-
- subtitles = {}
- text_tracks = config['request'].get('text_tracks')
- if text_tracks:
- for tt in text_tracks:
- subtitles[tt['lang']] = [{
- 'ext': 'vtt',
- 'url': 'https://vimeo.com' + tt['url'],
- }]
-
- thumbnails = []
- if not is_live:
- for key, thumb in video_data.get('thumbs', {}).items():
- thumbnails.append({
- 'id': key,
- 'width': int_or_none(key),
- 'url': thumb,
- })
- thumbnail = video_data.get('thumbnail')
- if thumbnail:
- thumbnails.append({
- 'url': thumbnail,
- })
-
- owner = video_data.get('owner') or {}
- video_uploader_url = owner.get('url')
-
- return {
- 'id': video_id,
- 'title': self._live_title(video_title) if is_live else video_title,
- 'uploader': owner.get('name'),
- 'uploader_id': video_uploader_url.split('/')[-1] if video_uploader_url else None,
- 'uploader_url': video_uploader_url,
- 'thumbnails': thumbnails,
- 'duration': int_or_none(video_data.get('duration')),
- 'formats': formats,
- 'subtitles': subtitles,
- 'is_live': is_live,
- }
-
- def _extract_original_format(self, url, video_id):
- download_data = self._download_json(
- url, video_id, fatal=False,
- query={'action': 'load_download_config'},
- headers={'X-Requested-With': 'XMLHttpRequest'})
- if download_data:
- source_file = download_data.get('source_file')
- if isinstance(source_file, dict):
- download_url = source_file.get('download_url')
- if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
- source_name = source_file.get('public_name', 'Original')
- if self._is_valid_url(download_url, video_id, '%s video' % source_name):
- ext = (try_get(
- source_file, lambda x: x['extension'],
- compat_str) or determine_ext(
- download_url, None) or 'mp4').lower()
- return {
- 'url': download_url,
- 'ext': ext,
- 'width': int_or_none(source_file.get('width')),
- 'height': int_or_none(source_file.get('height')),
- 'filesize': parse_filesize(source_file.get('size')),
- 'format_id': source_name,
- 'preference': 1,
- }
-
-
-class VimeoIE(VimeoBaseInfoExtractor):
- """Information extractor for vimeo.com."""
-
- # _VALID_URL matches Vimeo URLs
- _VALID_URL = r'''(?x)
- https?://
- (?:
- (?:
- www|
- (?P<player>player)
- )
- \.
- )?
- vimeo(?P<pro>pro)?\.com/
- (?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
- (?:.*?/)?
- (?:
- (?:
- play_redirect_hls|
- moogaloop\.swf)\?clip_id=
- )?
- (?:videos?/)?
- (?P<id>[0-9]+)
- (?:/[\da-f]+)?
- /?(?:[?&].*)?(?:[#].*)?$
- '''
- IE_NAME = 'vimeo'
- _TESTS = [
- {
- 'url': 'http://vimeo.com/56015672#at=0',
- 'md5': '8879b6cc097e987f02484baf890129e5',
- 'info_dict': {
- 'id': '56015672',
- 'ext': 'mp4',
- 'title': "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
- 'description': 'md5:509a9ad5c9bf97c60faee9203aca4479',
- 'timestamp': 1355990239,
- 'upload_date': '20121220',
- 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user7108434',
- 'uploader_id': 'user7108434',
- 'uploader': 'Filippo Valsorda',
- 'duration': 10,
- 'license': 'by-sa',
- },
- },
- {
- 'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876',
- 'md5': '3b5ca6aa22b60dfeeadf50b72e44ed82',
- 'note': 'Vimeo Pro video (#1197)',
- 'info_dict': {
- 'id': '68093876',
- 'ext': 'mp4',
- 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/openstreetmapus',
- 'uploader_id': 'openstreetmapus',
- 'uploader': 'OpenStreetMap US',
- 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
- 'description': 'md5:fd69a7b8d8c34a4e1d2ec2e4afd6ec30',
- 'duration': 1595,
- },
- },
- {
- 'url': 'http://player.vimeo.com/video/54469442',
- 'md5': '619b811a4417aa4abe78dc653becf511',
- 'note': 'Videos that embed the url in the player page',
- 'info_dict': {
- 'id': '54469442',
- 'ext': 'mp4',
- 'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software 2012',
- 'uploader': 'The BLN & Business of Software',
- 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/theblnbusinessofsoftware',
- 'uploader_id': 'theblnbusinessofsoftware',
- 'duration': 3610,
- 'description': None,
- },
- },
- {
- 'url': 'http://vimeo.com/68375962',
- 'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7',
- 'note': 'Video protected with password',
- 'info_dict': {
- 'id': '68375962',
- 'ext': 'mp4',
- 'title': 'youtube-dl password protected test video',
- 'timestamp': 1371200155,
- 'upload_date': '20130614',
- 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
- 'uploader_id': 'user18948128',
- 'uploader': 'Jaime Marquínez Ferrándiz',
- 'duration': 10,
- 'description': 'md5:dca3ea23adb29ee387127bc4ddfce63f',
- },
- 'params': {
- 'videopassword': 'youtube-dl',
- },
- },
- {
- 'url': 'http://vimeo.com/channels/keypeele/75629013',
- 'md5': '2f86a05afe9d7abc0b9126d229bbe15d',
- 'info_dict': {
- 'id': '75629013',
- 'ext': 'mp4',
- 'title': 'Key & Peele: Terrorist Interrogation',
- 'description': 'md5:8678b246399b070816b12313e8b4eb5c',
- 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/atencio',
- 'uploader_id': 'atencio',
- 'uploader': 'Peter Atencio',
- 'channel_id': 'keypeele',
- 'channel_url': r're:https?://(?:www\.)?vimeo\.com/channels/keypeele',
- 'timestamp': 1380339469,
- 'upload_date': '20130928',
- 'duration': 187,
- },
- 'expected_warnings': ['Unable to download JSON metadata'],
- },
- {
- 'url': 'http://vimeo.com/76979871',
- 'note': 'Video with subtitles',
- 'info_dict': {
- 'id': '76979871',
- 'ext': 'mp4',
- 'title': 'The New Vimeo Player (You Know, For Videos)',
- 'description': 'md5:2ec900bf97c3f389378a96aee11260ea',
- 'timestamp': 1381846109,
- 'upload_date': '20131015',
- 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/staff',
- 'uploader_id': 'staff',
- 'uploader': 'Vimeo Staff',
- 'duration': 62,
- }
- },
- {
- # from https://www.ouya.tv/game/Pier-Solar-and-the-Great-Architects/
- 'url': 'https://player.vimeo.com/video/98044508',
- 'note': 'The js code contains assignments to the same variable as the config',
- 'info_dict': {
- 'id': '98044508',
- 'ext': 'mp4',
- 'title': 'Pier Solar OUYA Official Trailer',
- 'uploader': 'Tulio Gonçalves',
- 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user28849593',
- 'uploader_id': 'user28849593',
- },
- },
- {
- # contains original format
- 'url': 'https://vimeo.com/33951933',
- 'md5': '53c688fa95a55bf4b7293d37a89c5c53',
- 'info_dict': {
- 'id': '33951933',
- 'ext': 'mp4',
- 'title': 'FOX CLASSICS - Forever Classic ID - A Full Minute',
- 'uploader': 'The DMCI',
- 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/dmci',
- 'uploader_id': 'dmci',
- 'timestamp': 1324343742,
- 'upload_date': '20111220',
- 'description': 'md5:ae23671e82d05415868f7ad1aec21147',
- },
- },
- {
- # only available via https://vimeo.com/channels/tributes/6213729 and
- # not via https://vimeo.com/6213729
- 'url': 'https://vimeo.com/channels/tributes/6213729',
- 'info_dict': {
- 'id': '6213729',
- 'ext': 'mp4',
- 'title': 'Vimeo Tribute: The Shining',
- 'uploader': 'Casey Donahue',
- 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/caseydonahue',
- 'uploader_id': 'caseydonahue',
- 'channel_url': r're:https?://(?:www\.)?vimeo\.com/channels/tributes',
- 'channel_id': 'tributes',
- 'timestamp': 1250886430,
- 'upload_date': '20090821',
- 'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6',
- },
- 'params': {
- 'skip_download': True,
- },
- 'expected_warnings': ['Unable to download JSON metadata'],
- },
- {
- # redirects to ondemand extractor and should be passed through it
- # for successful extraction
- 'url': 'https://vimeo.com/73445910',
- 'info_dict': {
- 'id': '73445910',
- 'ext': 'mp4',
- 'title': 'The Reluctant Revolutionary',
- 'uploader': '10Ft Films',
- 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/tenfootfilms',
- 'uploader_id': 'tenfootfilms',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- 'url': 'http://player.vimeo.com/video/68375962',
- 'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7',
- 'info_dict': {
- 'id': '68375962',
- 'ext': 'mp4',
- 'title': 'youtube-dl password protected test video',
- 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
- 'uploader_id': 'user18948128',
- 'uploader': 'Jaime Marquínez Ferrándiz',
- 'duration': 10,
- },
- 'params': {
- 'videopassword': 'youtube-dl',
- },
- },
- {
- 'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741',
- 'only_matching': True,
- },
- {
- 'url': 'https://vimeo.com/109815029',
- 'note': 'Video not completely processed, "failed" seed status',
- 'only_matching': True,
- },
- {
- 'url': 'https://vimeo.com/groups/travelhd/videos/22439234',
- 'only_matching': True,
- },
- {
- 'url': 'https://vimeo.com/album/2632481/video/79010983',
- 'only_matching': True,
- },
- {
- # source file returns 403: Forbidden
- 'url': 'https://vimeo.com/7809605',
- 'only_matching': True,
- },
- {
- 'url': 'https://vimeo.com/160743502/abd0e13fb4',
- 'only_matching': True,
- }
- # https://gettingthingsdone.com/workflowmap/
- # vimeo embed with check-password page protected by Referer header
- ]
-
- @staticmethod
- def _smuggle_referrer(url, referrer_url):
- return smuggle_url(url, {'http_headers': {'Referer': referrer_url}})
-
- @staticmethod
- def _extract_urls(url, webpage):
- urls = []
- # Look for embedded (iframe) Vimeo player
- for mobj in re.finditer(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/\d+.*?)\1',
- webpage):
- urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url))
- PLAIN_EMBED_RE = (
- # Look for embedded (swf embed) Vimeo player
- r'<embed[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)\1',
- # Look more for non-standard embedded Vimeo player
- r'<video[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)\1',
- )
- for embed_re in PLAIN_EMBED_RE:
- for mobj in re.finditer(embed_re, webpage):
- urls.append(mobj.group('url'))
- return urls
-
- @staticmethod
- def _extract_url(url, webpage):
- urls = VimeoIE._extract_urls(url, webpage)
- return urls[0] if urls else None
-
- def _verify_player_video_password(self, url, video_id, headers):
- password = self._downloader.params.get('videopassword')
- if password is None:
- raise ExtractorError('This video is protected by a password, use the --video-password option')
- data = urlencode_postdata({
- 'password': base64.b64encode(password.encode()),
- })
- headers = merge_dicts(headers, {
- 'Content-Type': 'application/x-www-form-urlencoded',
- })
- checked = self._download_json(
- url + '/check-password', video_id,
- 'Verifying the password', data=data, headers=headers)
- if checked is False:
- raise ExtractorError('Wrong video password', expected=True)
- return checked
-
- def _real_initialize(self):
- self._login()
-
- def _real_extract(self, url):
- url, data = unsmuggle_url(url, {})
- headers = std_headers.copy()
- if 'http_headers' in data:
- headers.update(data['http_headers'])
- if 'Referer' not in headers:
- headers['Referer'] = url
-
- channel_id = self._search_regex(
- r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None)
-
- # Extract ID from URL
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- orig_url = url
- if mobj.group('pro'):
- # some videos require portfolio_id to be present in player url
- # https://github.com/ytdl-org/youtube-dl/issues/20070
- url = self._extract_url(url, self._download_webpage(url, video_id))
- elif mobj.group('player'):
- url = 'https://player.vimeo.com/video/' + video_id
- elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')):
- url = 'https://vimeo.com/' + video_id
-
- # Retrieve video webpage to extract further information
- request = sanitized_Request(url, headers=headers)
- try:
- webpage, urlh = self._download_webpage_handle(request, video_id)
- redirect_url = compat_str(urlh.geturl())
- # Some URLs redirect to ondemand can't be extracted with
- # this extractor right away thus should be passed through
- # ondemand extractor (e.g. https://vimeo.com/73445910)
- if VimeoOndemandIE.suitable(redirect_url):
- return self.url_result(redirect_url, VimeoOndemandIE.ie_key())
- except ExtractorError as ee:
- if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
- errmsg = ee.cause.read()
- if b'Because of its privacy settings, this video cannot be played here' in errmsg:
- raise ExtractorError(
- 'Cannot download embed-only video without embedding '
- 'URL. Please call youtube-dl with the URL of the page '
- 'that embeds this video.',
- expected=True)
- raise
-
- # Now we begin extracting as much information as we can from what we
- # retrieved. First we extract the information common to all extractors,
- # and latter we extract those that are Vimeo specific.
- self.report_extraction(video_id)
-
- vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None)
- if vimeo_config:
- seed_status = vimeo_config.get('seed_status', {})
- if seed_status.get('state') == 'failed':
- raise ExtractorError(
- '%s said: %s' % (self.IE_NAME, seed_status['title']),
- expected=True)
-
- cc_license = None
- timestamp = None
-
- # Extract the config JSON
- try:
- try:
- config_url = self._html_search_regex(
- r' data-config-url="(.+?)"', webpage,
- 'config URL', default=None)
- if not config_url:
- # Sometimes new react-based page is served instead of old one that require
- # different config URL extraction approach (see
- # https://github.com/ytdl-org/youtube-dl/pull/7209)
- vimeo_clip_page_config = self._search_regex(
- r'vimeo\.clip_page_config\s*=\s*({.+?});', webpage,
- 'vimeo clip page config')
- page_config = self._parse_json(vimeo_clip_page_config, video_id)
- config_url = page_config['player']['config_url']
- cc_license = page_config.get('cc_license')
- timestamp = try_get(
- page_config, lambda x: x['clip']['uploaded_on'],
- compat_str)
- config_json = self._download_webpage(config_url, video_id)
- config = json.loads(config_json)
- except RegexNotFoundError:
- # For pro videos or player.vimeo.com urls
- # We try to find out to which variable is assigned the config dic
- m_variable_name = re.search(r'(\w)\.video\.id', webpage)
- if m_variable_name is not None:
- config_re = [r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))]
- else:
- config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
- config_re.append(r'\bvar\s+r\s*=\s*({.+?})\s*;')
- config_re.append(r'\bconfig\s*=\s*({.+?})\s*;')
- config = self._search_regex(config_re, webpage, 'info section',
- flags=re.DOTALL)
- config = json.loads(config)
- except Exception as e:
- if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
- raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option')
-
- if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None:
- if '_video_password_verified' in data:
- raise ExtractorError('video password verification failed!')
- self._verify_video_password(redirect_url, video_id, webpage)
- return self._real_extract(
- smuggle_url(redirect_url, {'_video_password_verified': 'verified'}))
- else:
- raise ExtractorError('Unable to extract info section',
- cause=e)
- else:
- if config.get('view') == 4:
- config = self._verify_player_video_password(redirect_url, video_id, headers)
-
- vod = config.get('video', {}).get('vod', {})
-
- def is_rented():
- if '>You rented this title.<' in webpage:
- return True
- if config.get('user', {}).get('purchased'):
- return True
- for purchase_option in vod.get('purchase_options', []):
- if purchase_option.get('purchased'):
- return True
- label = purchase_option.get('label_string')
- if label and (label.startswith('You rented this') or label.endswith(' remaining')):
- return True
- return False
-
- if is_rented() and vod.get('is_trailer'):
- feature_id = vod.get('feature_id')
- if feature_id and not data.get('force_feature_id', False):
- return self.url_result(smuggle_url(
- 'https://player.vimeo.com/player/%s' % feature_id,
- {'force_feature_id': True}), 'Vimeo')
-
- # Extract video description
-
- video_description = self._html_search_regex(
- r'(?s)<div\s+class="[^"]*description[^"]*"[^>]*>(.*?)</div>',
- webpage, 'description', default=None)
- if not video_description:
- video_description = self._html_search_meta(
- 'description', webpage, default=None)
- if not video_description and mobj.group('pro'):
- orig_webpage = self._download_webpage(
- orig_url, video_id,
- note='Downloading webpage for description',
- fatal=False)
- if orig_webpage:
- video_description = self._html_search_meta(
- 'description', orig_webpage, default=None)
- if not video_description and not mobj.group('player'):
- self._downloader.report_warning('Cannot find video description')
-
- # Extract upload date
- if not timestamp:
- timestamp = self._search_regex(
- r'<time[^>]+datetime="([^"]+)"', webpage,
- 'timestamp', default=None)
-
- try:
- view_count = int(self._search_regex(r'UserPlays:(\d+)', webpage, 'view count'))
- like_count = int(self._search_regex(r'UserLikes:(\d+)', webpage, 'like count'))
- comment_count = int(self._search_regex(r'UserComments:(\d+)', webpage, 'comment count'))
- except RegexNotFoundError:
- # This info is only available in vimeo.com/{id} urls
- view_count = None
- like_count = None
- comment_count = None
-
- formats = []
-
- source_format = self._extract_original_format(
- 'https://vimeo.com/' + video_id, video_id)
- if source_format:
- formats.append(source_format)
-
- info_dict_config = self._parse_config(config, video_id)
- formats.extend(info_dict_config['formats'])
- self._vimeo_sort_formats(formats)
-
- json_ld = self._search_json_ld(webpage, video_id, default={})
-
- if not cc_license:
- cc_license = self._search_regex(
- r'<link[^>]+rel=["\']license["\'][^>]+href=(["\'])(?P<license>(?:(?!\1).)+)\1',
- webpage, 'license', default=None, group='license')
-
- channel_url = 'https://vimeo.com/channels/%s' % channel_id if channel_id else None
-
- info_dict = {
- 'formats': formats,
- 'timestamp': unified_timestamp(timestamp),
- 'description': video_description,
- 'webpage_url': url,
- 'view_count': view_count,
- 'like_count': like_count,
- 'comment_count': comment_count,
- 'license': cc_license,
- 'channel_id': channel_id,
- 'channel_url': channel_url,
- }
-
- info_dict = merge_dicts(info_dict, info_dict_config, json_ld)
-
- return info_dict
-
-
-class VimeoOndemandIE(VimeoBaseInfoExtractor):
- IE_NAME = 'vimeo:ondemand'
- _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/(?P<id>[^/?#&]+)'
- _TESTS = [{
- # ondemand video not available via https://vimeo.com/id
- 'url': 'https://vimeo.com/ondemand/20704',
- 'md5': 'c424deda8c7f73c1dfb3edd7630e2f35',
- 'info_dict': {
- 'id': '105442900',
- 'ext': 'mp4',
- 'title': 'המעבדה - במאי יותם פלדמן',
- 'uploader': 'גם סרטים',
- 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/gumfilms',
- 'uploader_id': 'gumfilms',
- },
- 'params': {
- 'format': 'best[protocol=https]',
- },
- }, {
- # requires Referer to be passed along with og:video:url
- 'url': 'https://vimeo.com/ondemand/36938/126682985',
- 'info_dict': {
- 'id': '126682985',
- 'ext': 'mp4',
- 'title': 'Rävlock, rätt läte på rätt plats',
- 'uploader': 'Lindroth & Norin',
- 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user14430847',
- 'uploader_id': 'user14430847',
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'https://vimeo.com/ondemand/nazmaalik',
- 'only_matching': True,
- }, {
- 'url': 'https://vimeo.com/ondemand/141692381',
- 'only_matching': True,
- }, {
- 'url': 'https://vimeo.com/ondemand/thelastcolony/150274832',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- return self.url_result(
- # Some videos require Referer to be passed along with og:video:url
- # similarly to generic vimeo embeds (e.g.
- # https://vimeo.com/ondemand/36938/126682985).
- VimeoIE._smuggle_referrer(self._og_search_video_url(webpage), url),
- VimeoIE.ie_key())
-
-
-class VimeoChannelIE(VimeoBaseInfoExtractor):
- IE_NAME = 'vimeo:channel'
- _VALID_URL = r'https://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])'
- _MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
- _TITLE = None
- _TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
- _TESTS = [{
- 'url': 'https://vimeo.com/channels/tributes',
- 'info_dict': {
- 'id': 'tributes',
- 'title': 'Vimeo Tributes',
- },
- 'playlist_mincount': 25,
- }]
-
- def _page_url(self, base_url, pagenum):
- return '%s/videos/page:%d/' % (base_url, pagenum)
-
- def _extract_list_title(self, webpage):
- return self._TITLE or self._html_search_regex(
- self._TITLE_RE, webpage, 'list title', fatal=False)
-
- def _login_list_password(self, page_url, list_id, webpage):
- login_form = self._search_regex(
- r'(?s)<form[^>]+?id="pw_form"(.*?)</form>',
- webpage, 'login form', default=None)
- if not login_form:
- return webpage
-
- password = self._downloader.params.get('videopassword')
- if password is None:
- raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
- fields = self._hidden_inputs(login_form)
- token, vuid = self._extract_xsrft_and_vuid(webpage)
- fields['token'] = token
- fields['password'] = password
- post = urlencode_postdata(fields)
- password_path = self._search_regex(
- r'action="([^"]+)"', login_form, 'password URL')
- password_url = compat_urlparse.urljoin(page_url, password_path)
- password_request = sanitized_Request(password_url, post)
- password_request.add_header('Content-type', 'application/x-www-form-urlencoded')
- self._set_vimeo_cookie('vuid', vuid)
- self._set_vimeo_cookie('xsrft', token)
-
- return self._download_webpage(
- password_request, list_id,
- 'Verifying the password', 'Wrong password')
-
- def _title_and_entries(self, list_id, base_url):
- for pagenum in itertools.count(1):
- page_url = self._page_url(base_url, pagenum)
- webpage = self._download_webpage(
- page_url, list_id,
- 'Downloading page %s' % pagenum)
-
- if pagenum == 1:
- webpage = self._login_list_password(page_url, list_id, webpage)
- yield self._extract_list_title(webpage)
-
- # Try extracting href first since not all videos are available via
- # short https://vimeo.com/id URL (e.g. https://vimeo.com/channels/tributes/6213729)
- clips = re.findall(
- r'id="clip_(\d+)"[^>]*>\s*<a[^>]+href="(/(?:[^/]+/)*\1)(?:[^>]+\btitle="([^"]+)")?', webpage)
- if clips:
- for video_id, video_url, video_title in clips:
- yield self.url_result(
- compat_urlparse.urljoin(base_url, video_url),
- VimeoIE.ie_key(), video_id=video_id, video_title=video_title)
- # More relaxed fallback
- else:
- for video_id in re.findall(r'id=["\']clip_(\d+)', webpage):
- yield self.url_result(
- 'https://vimeo.com/%s' % video_id,
- VimeoIE.ie_key(), video_id=video_id)
-
- if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
- break
-
- def _extract_videos(self, list_id, base_url):
- title_and_entries = self._title_and_entries(list_id, base_url)
- list_title = next(title_and_entries)
- return self.playlist_result(title_and_entries, list_id, list_title)
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- channel_id = mobj.group('id')
- return self._extract_videos(channel_id, 'https://vimeo.com/channels/%s' % channel_id)
-
-
-class VimeoUserIE(VimeoChannelIE):
- IE_NAME = 'vimeo:user'
- _VALID_URL = r'https://vimeo\.com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P<name>[^/]+)(?:/videos|[#?]|$)'
- _TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
- _TESTS = [{
- 'url': 'https://vimeo.com/nkistudio/videos',
- 'info_dict': {
- 'title': 'Nki',
- 'id': 'nkistudio',
- },
- 'playlist_mincount': 66,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- name = mobj.group('name')
- return self._extract_videos(name, 'https://vimeo.com/%s' % name)
-
-
-class VimeoAlbumIE(VimeoChannelIE):
- IE_NAME = 'vimeo:album'
- _VALID_URL = r'https://vimeo\.com/(?:album|showcase)/(?P<id>\d+)(?:$|[?#]|/(?!video))'
- _TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
- _TESTS = [{
- 'url': 'https://vimeo.com/album/2632481',
- 'info_dict': {
- 'id': '2632481',
- 'title': 'Staff Favorites: November 2013',
- },
- 'playlist_mincount': 13,
- }, {
- 'note': 'Password-protected album',
- 'url': 'https://vimeo.com/album/3253534',
- 'info_dict': {
- 'title': 'test',
- 'id': '3253534',
- },
- 'playlist_count': 1,
- 'params': {
- 'videopassword': 'youtube-dl',
- }
- }]
- _PAGE_SIZE = 100
-
- def _fetch_page(self, album_id, authorizaion, hashed_pass, page):
- api_page = page + 1
- query = {
- 'fields': 'link,uri',
- 'page': api_page,
- 'per_page': self._PAGE_SIZE,
- }
- if hashed_pass:
- query['_hashed_pass'] = hashed_pass
- videos = self._download_json(
- 'https://api.vimeo.com/albums/%s/videos' % album_id,
- album_id, 'Downloading page %d' % api_page, query=query, headers={
- 'Authorization': 'jwt ' + authorizaion,
- })['data']
- for video in videos:
- link = video.get('link')
- if not link:
- continue
- uri = video.get('uri')
- video_id = self._search_regex(r'/videos/(\d+)', uri, 'video_id', default=None) if uri else None
- yield self.url_result(link, VimeoIE.ie_key(), video_id)
-
- def _real_extract(self, url):
- album_id = self._match_id(url)
- webpage = self._download_webpage(url, album_id)
- webpage = self._login_list_password(url, album_id, webpage)
- api_config = self._extract_vimeo_config(webpage, album_id)['api']
- entries = OnDemandPagedList(functools.partial(
- self._fetch_page, album_id, api_config['jwt'],
- api_config.get('hashed_pass')), self._PAGE_SIZE)
- return self.playlist_result(entries, album_id, self._html_search_regex(
- r'<title>\s*(.+?)(?:\s+on Vimeo)?</title>', webpage, 'title', fatal=False))
-
-
-class VimeoGroupsIE(VimeoAlbumIE):
- IE_NAME = 'vimeo:group'
- _VALID_URL = r'https://vimeo\.com/groups/(?P<name>[^/]+)(?:/(?!videos?/\d+)|$)'
- _TESTS = [{
- 'url': 'https://vimeo.com/groups/rolexawards',
- 'info_dict': {
- 'id': 'rolexawards',
- 'title': 'Rolex Awards for Enterprise',
- },
- 'playlist_mincount': 73,
- }]
-
- def _extract_list_title(self, webpage):
- return self._og_search_title(webpage, fatal=False)
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- name = mobj.group('name')
- return self._extract_videos(name, 'https://vimeo.com/groups/%s' % name)
-
-
-class VimeoReviewIE(VimeoBaseInfoExtractor):
- IE_NAME = 'vimeo:review'
- IE_DESC = 'Review pages on vimeo'
- _VALID_URL = r'(?P<url>https://vimeo\.com/[^/]+/review/(?P<id>[^/]+)/[0-9a-f]{10})'
- _TESTS = [{
- 'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
- 'md5': 'c507a72f780cacc12b2248bb4006d253',
- 'info_dict': {
- 'id': '75524534',
- 'ext': 'mp4',
- 'title': "DICK HARDWICK 'Comedian'",
- 'uploader': 'Richard Hardwick',
- 'uploader_id': 'user21297594',
- }
- }, {
- 'note': 'video player needs Referer',
- 'url': 'https://vimeo.com/user22258446/review/91613211/13f927e053',
- 'md5': '6295fdab8f4bf6a002d058b2c6dce276',
- 'info_dict': {
- 'id': '91613211',
- 'ext': 'mp4',
- 'title': 're:(?i)^Death by dogma versus assembling agile . Sander Hoogendoorn',
- 'uploader': 'DevWeek Events',
- 'duration': 2773,
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader_id': 'user22258446',
- }
- }, {
- 'note': 'Password protected',
- 'url': 'https://vimeo.com/user37284429/review/138823582/c4d865efde',
- 'info_dict': {
- 'id': '138823582',
- 'ext': 'mp4',
- 'title': 'EFFICIENT PICKUP MASTERCLASS MODULE 1',
- 'uploader': 'TMB',
- 'uploader_id': 'user37284429',
- },
- 'params': {
- 'videopassword': 'holygrail',
- },
- 'skip': 'video gone',
- }]
-
- def _real_initialize(self):
- self._login()
-
- def _get_config_url(self, webpage_url, video_id, video_password_verified=False):
- webpage = self._download_webpage(webpage_url, video_id)
- config_url = self._html_search_regex(
- r'data-config-url=(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
- 'config URL', default=None, group='url')
- if not config_url:
- data = self._parse_json(self._search_regex(
- r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data',
- default=NO_DEFAULT if video_password_verified else '{}'), video_id)
- config = data.get('vimeo_esi', {}).get('config', {})
- config_url = config.get('configUrl') or try_get(config, lambda x: x['clipData']['configUrl'])
- if config_url is None:
- self._verify_video_password(webpage_url, video_id, webpage)
- config_url = self._get_config_url(
- webpage_url, video_id, video_password_verified=True)
- return config_url
-
- def _real_extract(self, url):
- page_url, video_id = re.match(self._VALID_URL, url).groups()
- config_url = self._get_config_url(url, video_id)
- config = self._download_json(config_url, video_id)
- info_dict = self._parse_config(config, video_id)
- source_format = self._extract_original_format(page_url, video_id)
- if source_format:
- info_dict['formats'].append(source_format)
- self._vimeo_sort_formats(info_dict['formats'])
- return info_dict
-
-
-class VimeoWatchLaterIE(VimeoChannelIE):
- IE_NAME = 'vimeo:watchlater'
- IE_DESC = 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)'
- _VALID_URL = r'https://vimeo\.com/(?:home/)?watchlater|:vimeowatchlater'
- _TITLE = 'Watch Later'
- _LOGIN_REQUIRED = True
- _TESTS = [{
- 'url': 'https://vimeo.com/watchlater',
- 'only_matching': True,
- }]
-
- def _real_initialize(self):
- self._login()
-
- def _page_url(self, base_url, pagenum):
- url = '%s/page:%d/' % (base_url, pagenum)
- request = sanitized_Request(url)
- # Set the header to get a partial html page with the ids,
- # the normal page doesn't contain them.
- request.add_header('X-Requested-With', 'XMLHttpRequest')
- return request
-
- def _real_extract(self, url):
- return self._extract_videos('watchlater', 'https://vimeo.com/watchlater')
-
-
-class VimeoLikesIE(VimeoChannelIE):
- _VALID_URL = r'https://(?:www\.)?vimeo\.com/(?P<id>[^/]+)/likes/?(?:$|[?#]|sort:)'
- IE_NAME = 'vimeo:likes'
- IE_DESC = 'Vimeo user likes'
- _TESTS = [{
- 'url': 'https://vimeo.com/user755559/likes/',
- 'playlist_mincount': 293,
- 'info_dict': {
- 'id': 'user755559',
- 'title': 'urza’s Likes',
- },
- }, {
- 'url': 'https://vimeo.com/stormlapse/likes',
- 'only_matching': True,
- }]
-
- def _page_url(self, base_url, pagenum):
- return '%s/page:%d/' % (base_url, pagenum)
-
- def _real_extract(self, url):
- user_id = self._match_id(url)
- return self._extract_videos(user_id, 'https://vimeo.com/%s/likes' % user_id)
-
-
-class VHXEmbedIE(VimeoBaseInfoExtractor):
- IE_NAME = 'vhx:embed'
- _VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)'
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
- config_url = self._parse_json(self._search_regex(
- r'window\.OTTData\s*=\s*({.+})', webpage,
- 'ott data'), video_id, js_to_json)['config_url']
- config = self._download_json(config_url, video_id)
- info = self._parse_config(config, video_id)
- self._vimeo_sort_formats(info['formats'])
- return info
diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py
deleted file mode 100644
index 8b6dc0e24..000000000
--- a/youtube_dl/extractor/vk.py
+++ /dev/null
@@ -1,644 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import collections
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_urlparse
-from ..utils import (
- clean_html,
- ExtractorError,
- get_element_by_class,
- int_or_none,
- orderedSet,
- remove_start,
- str_or_none,
- str_to_int,
- unescapeHTML,
- unified_timestamp,
- url_or_none,
- urlencode_postdata,
-)
-from .dailymotion import DailymotionIE
-from .pladform import PladformIE
-from .vimeo import VimeoIE
-from .youtube import YoutubeIE
-
-
-class VKBaseIE(InfoExtractor):
- _NETRC_MACHINE = 'vk'
-
- def _login(self):
- username, password = self._get_login_info()
- if username is None:
- return
-
- login_page, url_handle = self._download_webpage_handle(
- 'https://vk.com', None, 'Downloading login page')
-
- login_form = self._hidden_inputs(login_page)
-
- login_form.update({
- 'email': username.encode('cp1251'),
- 'pass': password.encode('cp1251'),
- })
-
- # vk serves two same remixlhk cookies in Set-Cookie header and expects
- # first one to be actually set
- self._apply_first_set_cookie_header(url_handle, 'remixlhk')
-
- login_page = self._download_webpage(
- 'https://login.vk.com/?act=login', None,
- note='Logging in',
- data=urlencode_postdata(login_form))
-
- if re.search(r'onLoginFailed', login_page):
- raise ExtractorError(
- 'Unable to login, incorrect username and/or password', expected=True)
-
- def _real_initialize(self):
- self._login()
-
-
-class VKIE(VKBaseIE):
- IE_NAME = 'vk'
- IE_DESC = 'VK'
- _VALID_URL = r'''(?x)
- https?://
- (?:
- (?:
- (?:(?:m|new)\.)?vk\.com/video_|
- (?:www\.)?daxab.com/
- )
- ext\.php\?(?P<embed_query>.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+).*)|
- (?:
- (?:(?:m|new)\.)?vk\.com/(?:.+?\?.*?z=)?video|
- (?:www\.)?daxab.com/embed/
- )
- (?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>[\da-f]+))?
- )
- '''
- _TESTS = [
- {
- 'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
- 'md5': '7babad3b85ea2e91948005b1b8b0cb84',
- 'info_dict': {
- 'id': '-77521_162222515',
- 'ext': 'mp4',
- 'title': 'ProtivoGunz - Хуёвая песня',
- 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
- 'uploader_id': '-77521',
- 'duration': 195,
- 'timestamp': 1329049880,
- 'upload_date': '20120212',
- },
- },
- {
- 'url': 'http://vk.com/video205387401_165548505',
- 'md5': '6c0aeb2e90396ba97035b9cbde548700',
- 'info_dict': {
- 'id': '205387401_165548505',
- 'ext': 'mp4',
- 'title': 'No name',
- 'uploader': 'Tom Cruise',
- 'uploader_id': '205387401',
- 'duration': 9,
- 'timestamp': 1374364108,
- 'upload_date': '20130720',
- }
- },
- {
- 'note': 'Embedded video',
- 'url': 'http://vk.com/video_ext.php?oid=32194266&id=162925554&hash=7d8c2e0d5e05aeaa&hd=1',
- 'md5': 'c7ce8f1f87bec05b3de07fdeafe21a0a',
- 'info_dict': {
- 'id': '32194266_162925554',
- 'ext': 'mp4',
- 'uploader': 'Vladimir Gavrin',
- 'title': 'Lin Dan',
- 'duration': 101,
- 'upload_date': '20120730',
- 'view_count': int,
- },
- 'skip': 'This video has been removed from public access.',
- },
- {
- # VIDEO NOW REMOVED
- # please update if you find a video whose URL follows the same pattern
- 'url': 'http://vk.com/video-8871596_164049491',
- 'md5': 'a590bcaf3d543576c9bd162812387666',
- 'note': 'Only available for registered users',
- 'info_dict': {
- 'id': '-8871596_164049491',
- 'ext': 'mp4',
- 'uploader': 'Триллеры',
- 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
- 'duration': 8352,
- 'upload_date': '20121218',
- 'view_count': int,
- },
- 'skip': 'Requires vk account credentials',
- },
- {
- 'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d',
- 'md5': '4d7a5ef8cf114dfa09577e57b2993202',
- 'info_dict': {
- 'id': '-43215063_168067957',
- 'ext': 'mp4',
- 'uploader': 'Киномания - лучшее из мира кино',
- 'title': ' ',
- 'duration': 7291,
- 'upload_date': '20140328',
- },
- 'skip': 'Requires vk account credentials',
- },
- {
- 'url': 'http://m.vk.com/video-43215063_169084319?list=125c627d1aa1cebb83&from=wall-43215063_2566540',
- 'md5': '0c45586baa71b7cb1d0784ee3f4e00a6',
- 'note': 'ivi.ru embed',
- 'info_dict': {
- 'id': '-43215063_169084319',
- 'ext': 'mp4',
- 'title': 'Книга Илая',
- 'duration': 6771,
- 'upload_date': '20140626',
- 'view_count': int,
- },
- 'skip': 'Only works from Russia',
- },
- {
- # video (removed?) only available with list id
- 'url': 'https://vk.com/video30481095_171201961?list=8764ae2d21f14088d4',
- 'md5': '091287af5402239a1051c37ec7b92913',
- 'info_dict': {
- 'id': '30481095_171201961',
- 'ext': 'mp4',
- 'title': 'ТюменцевВВ_09.07.2015',
- 'uploader': 'Anton Ivanov',
- 'duration': 109,
- 'upload_date': '20150709',
- 'view_count': int,
- },
- 'skip': 'Removed',
- },
- {
- # youtube embed
- 'url': 'https://vk.com/video276849682_170681728',
- 'info_dict': {
- 'id': 'V3K4mi0SYkc',
- 'ext': 'mp4',
- 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
- 'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
- 'duration': 178,
- 'upload_date': '20130116',
- 'uploader': "Children's Joy Foundation Inc.",
- 'uploader_id': 'thecjf',
- 'view_count': int,
- },
- },
- {
- # dailymotion embed
- 'url': 'https://vk.com/video-37468416_456239855',
- 'info_dict': {
- 'id': 'k3lz2cmXyRuJQSjGHUv',
- 'ext': 'mp4',
- 'title': 'md5:d52606645c20b0ddbb21655adaa4f56f',
- # TODO: fix test by fixing dailymotion description extraction
- 'description': 'md5:c651358f03c56f1150b555c26d90a0fd',
- 'uploader': 'AniLibria.Tv',
- 'upload_date': '20160914',
- 'uploader_id': 'x1p5vl5',
- 'timestamp': 1473877246,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # video key is extra_data not url\d+
- 'url': 'http://vk.com/video-110305615_171782105',
- 'md5': 'e13fcda136f99764872e739d13fac1d1',
- 'info_dict': {
- 'id': '-110305615_171782105',
- 'ext': 'mp4',
- 'title': 'S-Dance, репетиции к The way show',
- 'uploader': 'THE WAY SHOW | 17 апреля',
- 'uploader_id': '-110305615',
- 'timestamp': 1454859345,
- 'upload_date': '20160207',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # finished live stream, postlive_mp4
- 'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2',
- 'info_dict': {
- 'id': '-387766_456242764',
- 'ext': 'mp4',
- 'title': 'ИгроМир 2016 День 1 — Игромания Утром',
- 'uploader': 'Игромания',
- 'duration': 5239,
- # TODO: use act=show to extract view_count
- # 'view_count': int,
- 'upload_date': '20160929',
- 'uploader_id': '-387766',
- 'timestamp': 1475137527,
- },
- },
- {
- # live stream, hls and rtmp links, most likely already finished live
- # stream by the time you are reading this comment
- 'url': 'https://vk.com/video-140332_456239111',
- 'only_matching': True,
- },
- {
- # removed video, just testing that we match the pattern
- 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
- 'only_matching': True,
- },
- {
- # age restricted video, requires vk account credentials
- 'url': 'https://vk.com/video205387401_164765225',
- 'only_matching': True,
- },
- {
- # pladform embed
- 'url': 'https://vk.com/video-76116461_171554880',
- 'only_matching': True,
- },
- {
- 'url': 'http://new.vk.com/video205387401_165548505',
- 'only_matching': True,
- },
- {
- # This video is no longer available, because its author has been blocked.
- 'url': 'https://vk.com/video-10639516_456240611',
- 'only_matching': True,
- },
- {
- # The video is not available in your region.
- 'url': 'https://vk.com/video-51812607_171445436',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('videoid')
-
- if video_id:
- info_url = 'https://vk.com/al_video.php?act=show_inline&al=1&video=' + video_id
- # Some videos (removed?) can only be downloaded with list id specified
- list_id = mobj.group('list_id')
- if list_id:
- info_url += '&list=%s' % list_id
- else:
- info_url = 'http://vk.com/video_ext.php?' + mobj.group('embed_query')
- video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
-
- info_page = self._download_webpage(info_url, video_id)
-
- error_message = self._html_search_regex(
- [r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
- r'(?s)<div[^>]+id="video_ext_msg"[^>]*>(.+?)</div>'],
- info_page, 'error message', default=None)
- if error_message:
- raise ExtractorError(error_message, expected=True)
-
- if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page):
- raise ExtractorError(
- 'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.',
- expected=True)
-
- ERROR_COPYRIGHT = 'Video %s has been removed from public access due to rightholder complaint.'
-
- ERRORS = {
- r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<':
- ERROR_COPYRIGHT,
-
- r'>The video .*? was removed from public access by request of the copyright holder.<':
- ERROR_COPYRIGHT,
-
- r'<!>Please log in or <':
- 'Video %s is only available for registered users, '
- 'use --username and --password options to provide account credentials.',
-
- r'<!>Unknown error':
- 'Video %s does not exist.',
-
- r'<!>Видео временно недоступно':
- 'Video %s is temporarily unavailable.',
-
- r'<!>Access denied':
- 'Access denied to video %s.',
-
- r'<!>Видеозапись недоступна, так как её автор был заблокирован.':
- 'Video %s is no longer available, because its author has been blocked.',
-
- r'<!>This video is no longer available, because its author has been blocked.':
- 'Video %s is no longer available, because its author has been blocked.',
-
- r'<!>This video is no longer available, because it has been deleted.':
- 'Video %s is no longer available, because it has been deleted.',
-
- r'<!>The video .+? is not available in your region.':
- 'Video %s is not available in your region.',
- }
-
- for error_re, error_msg in ERRORS.items():
- if re.search(error_re, info_page):
- raise ExtractorError(error_msg % video_id, expected=True)
-
- youtube_url = YoutubeIE._extract_url(info_page)
- if youtube_url:
- return self.url_result(youtube_url, ie=YoutubeIE.ie_key())
-
- vimeo_url = VimeoIE._extract_url(url, info_page)
- if vimeo_url is not None:
- return self.url_result(vimeo_url)
-
- pladform_url = PladformIE._extract_url(info_page)
- if pladform_url:
- return self.url_result(pladform_url)
-
- m_rutube = re.search(
- r'\ssrc="((?:https?:)?//rutube\.ru\\?/(?:video|play)\\?/embed(?:.*?))\\?"', info_page)
- if m_rutube is not None:
- rutube_url = self._proto_relative_url(
- m_rutube.group(1).replace('\\', ''))
- return self.url_result(rutube_url)
-
- dailymotion_urls = DailymotionIE._extract_urls(info_page)
- if dailymotion_urls:
- return self.url_result(dailymotion_urls[0], DailymotionIE.ie_key())
-
- m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page)
- if m_opts:
- m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1))
- if m_opts_url:
- opts_url = m_opts_url.group(1)
- if opts_url.startswith('//'):
- opts_url = 'http:' + opts_url
- return self.url_result(opts_url)
-
- # vars does not look to be served anymore since 24.10.2016
- data = self._parse_json(
- self._search_regex(
- r'var\s+vars\s*=\s*({.+?});', info_page, 'vars', default='{}'),
- video_id, fatal=False)
-
- # <!json> is served instead
- if not data:
- data = self._parse_json(
- self._search_regex(
- [r'<!json>\s*({.+?})\s*<!>', r'<!json>\s*({.+})'],
- info_page, 'json', default='{}'),
- video_id)
- if data:
- data = data['player']['params'][0]
-
- if not data:
- data = self._parse_json(
- self._search_regex(
- r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n', info_page,
- 'player params', default='{}'),
- video_id)
- if data:
- data = data['params'][0]
-
- # <!--{...}
- if not data:
- data = self._parse_json(
- self._search_regex(
- r'<!--\s*({.+})', info_page, 'payload'),
- video_id)['payload'][-1][-1]['player']['params'][0]
-
- title = unescapeHTML(data['md_title'])
-
- # 2 = live
- # 3 = post live (finished live)
- is_live = data.get('live') == 2
- if is_live:
- title = self._live_title(title)
-
- timestamp = unified_timestamp(self._html_search_regex(
- r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
- 'upload date', default=None)) or int_or_none(data.get('date'))
-
- view_count = str_to_int(self._search_regex(
- r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
- info_page, 'view count', default=None))
-
- formats = []
- for format_id, format_url in data.items():
- format_url = url_or_none(format_url)
- if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
- continue
- if (format_id.startswith(('url', 'cache'))
- or format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
- height = int_or_none(self._search_regex(
- r'^(?:url|cache)(\d+)', format_id, 'height', default=None))
- formats.append({
- 'format_id': format_id,
- 'url': format_url,
- 'height': height,
- })
- elif format_id == 'hls':
- formats.extend(self._extract_m3u8_formats(
- format_url, video_id, 'mp4', 'm3u8_native',
- m3u8_id=format_id, fatal=False, live=is_live))
- elif format_id == 'rtmp':
- formats.append({
- 'format_id': format_id,
- 'url': format_url,
- 'ext': 'flv',
- })
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'formats': formats,
- 'title': title,
- 'thumbnail': data.get('jpg'),
- 'uploader': data.get('md_author'),
- 'uploader_id': str_or_none(data.get('author_id')),
- 'duration': data.get('duration'),
- 'timestamp': timestamp,
- 'view_count': view_count,
- 'like_count': int_or_none(data.get('liked')),
- 'dislike_count': int_or_none(data.get('nolikes')),
- 'is_live': is_live,
- }
-
-
-class VKUserVideosIE(VKBaseIE):
- IE_NAME = 'vk:uservideos'
- IE_DESC = "VK - User's Videos"
- _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)'
- _TEMPLATE_URL = 'https://vk.com/videos'
- _TESTS = [{
- 'url': 'http://vk.com/videos205387401',
- 'info_dict': {
- 'id': '205387401',
- 'title': "Tom Cruise's Videos",
- },
- 'playlist_mincount': 4,
- }, {
- 'url': 'http://vk.com/videos-77521',
- 'only_matching': True,
- }, {
- 'url': 'http://vk.com/videos-97664626?section=all',
- 'only_matching': True,
- }, {
- 'url': 'http://m.vk.com/videos205387401',
- 'only_matching': True,
- }, {
- 'url': 'http://new.vk.com/videos205387401',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- page_id = self._match_id(url)
-
- webpage = self._download_webpage(url, page_id)
-
- entries = [
- self.url_result(
- 'http://vk.com/video' + video_id, 'VK', video_id=video_id)
- for video_id in orderedSet(re.findall(r'href="/video(-?[0-9_]+)"', webpage))]
-
- title = unescapeHTML(self._search_regex(
- r'<title>\s*([^<]+?)\s+\|\s+\d+\s+videos',
- webpage, 'title', default=page_id))
-
- return self.playlist_result(entries, page_id, title)
-
-
-class VKWallPostIE(VKBaseIE):
- IE_NAME = 'vk:wallpost'
- _VALID_URL = r'https?://(?:(?:(?:(?:m|new)\.)?vk\.com/(?:[^?]+\?.*\bw=)?wall(?P<id>-?\d+_\d+)))'
- _TESTS = [{
- # public page URL, audio playlist
- 'url': 'https://vk.com/bs.official?w=wall-23538238_35',
- 'info_dict': {
- 'id': '23538238_35',
- 'title': 'Black Shadow - Wall post 23538238_35',
- 'description': 'md5:3f84b9c4f9ef499731cf1ced9998cc0c',
- },
- 'playlist': [{
- 'md5': '5ba93864ec5b85f7ce19a9af4af080f6',
- 'info_dict': {
- 'id': '135220665_111806521',
- 'ext': 'mp3',
- 'title': 'Black Shadow - Слепое Верование',
- 'duration': 370,
- 'uploader': 'Black Shadow',
- 'artist': 'Black Shadow',
- 'track': 'Слепое Верование',
- },
- }, {
- 'md5': '4cc7e804579122b17ea95af7834c9233',
- 'info_dict': {
- 'id': '135220665_111802303',
- 'ext': 'mp3',
- 'title': 'Black Shadow - Война - Негасимое Бездны Пламя!',
- 'duration': 423,
- 'uploader': 'Black Shadow',
- 'artist': 'Black Shadow',
- 'track': 'Война - Негасимое Бездны Пламя!',
- },
- 'params': {
- 'skip_download': True,
- },
- }],
- 'params': {
- 'usenetrc': True,
- },
- 'skip': 'Requires vk account credentials',
- }, {
- # single YouTube embed, no leading -
- 'url': 'https://vk.com/wall85155021_6319',
- 'info_dict': {
- 'id': '85155021_6319',
- 'title': 'Sergey Gorbunov - Wall post 85155021_6319',
- },
- 'playlist_count': 1,
- 'params': {
- 'usenetrc': True,
- },
- 'skip': 'Requires vk account credentials',
- }, {
- # wall page URL
- 'url': 'https://vk.com/wall-23538238_35',
- 'only_matching': True,
- }, {
- # mobile wall page URL
- 'url': 'https://m.vk.com/wall-23538238_35',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- post_id = self._match_id(url)
-
- wall_url = 'https://vk.com/wall%s' % post_id
-
- post_id = remove_start(post_id, '-')
-
- webpage = self._download_webpage(wall_url, post_id)
-
- error = self._html_search_regex(
- r'>Error</div>\s*<div[^>]+class=["\']body["\'][^>]*>([^<]+)',
- webpage, 'error', default=None)
- if error:
- raise ExtractorError('VK said: %s' % error, expected=True)
-
- description = clean_html(get_element_by_class('wall_post_text', webpage))
- uploader = clean_html(get_element_by_class('author', webpage))
- thumbnail = self._og_search_thumbnail(webpage)
-
- entries = []
-
- audio_ids = re.findall(r'data-full-id=["\'](\d+_\d+)', webpage)
- if audio_ids:
- al_audio = self._download_webpage(
- 'https://vk.com/al_audio.php', post_id,
- note='Downloading audio info', fatal=False,
- data=urlencode_postdata({
- 'act': 'reload_audio',
- 'al': '1',
- 'ids': ','.join(audio_ids)
- }))
- if al_audio:
- Audio = collections.namedtuple(
- 'Audio', ['id', 'user_id', 'url', 'track', 'artist', 'duration'])
- audios = self._parse_json(
- self._search_regex(
- r'<!json>(.+?)<!>', al_audio, 'audios', default='[]'),
- post_id, fatal=False, transform_source=unescapeHTML)
- if isinstance(audios, list):
- for audio in audios:
- a = Audio._make(audio[:6])
- entries.append({
- 'id': '%s_%s' % (a.user_id, a.id),
- 'url': a.url,
- 'title': '%s - %s' % (a.artist, a.track) if a.artist and a.track else a.id,
- 'thumbnail': thumbnail,
- 'duration': a.duration,
- 'uploader': uploader,
- 'artist': a.artist,
- 'track': a.track,
- })
-
- for video in re.finditer(
- r'<a[^>]+href=(["\'])(?P<url>/video(?:-?[\d_]+).*?)\1', webpage):
- entries.append(self.url_result(
- compat_urlparse.urljoin(url, video.group('url')), VKIE.ie_key()))
-
- title = 'Wall post %s' % post_id
-
- return self.playlist_result(
- orderedSet(entries), post_id,
- '%s - %s' % (uploader, title) if uploader else title,
- description)
diff --git a/youtube_dl/extractor/vlive.py b/youtube_dl/extractor/vlive.py
deleted file mode 100644
index c3429f723..000000000
--- a/youtube_dl/extractor/vlive.py
+++ /dev/null
@@ -1,405 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-import time
-import itertools
-
-from .common import InfoExtractor
-from ..compat import (
- compat_urllib_parse_urlencode,
- compat_str,
-)
-from ..utils import (
- dict_get,
- ExtractorError,
- float_or_none,
- int_or_none,
- remove_start,
- try_get,
- urlencode_postdata,
-)
-
-
-class VLiveIE(InfoExtractor):
- IE_NAME = 'vlive'
- _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
- _NETRC_MACHINE = 'vlive'
- _TESTS = [{
- 'url': 'http://www.vlive.tv/video/1326',
- 'md5': 'cc7314812855ce56de70a06a27314983',
- 'info_dict': {
- 'id': '1326',
- 'ext': 'mp4',
- 'title': "[V LIVE] Girl's Day's Broadcast",
- 'creator': "Girl's Day",
- 'view_count': int,
- },
- }, {
- 'url': 'http://www.vlive.tv/video/16937',
- 'info_dict': {
- 'id': '16937',
- 'ext': 'mp4',
- 'title': '[V LIVE] 첸백시 걍방',
- 'creator': 'EXO',
- 'view_count': int,
- 'subtitles': 'mincount:12',
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'https://www.vlive.tv/video/129100',
- 'md5': 'ca2569453b79d66e5b919e5d308bff6b',
- 'info_dict': {
- 'id': '129100',
- 'ext': 'mp4',
- 'title': '[V LIVE] [BTS+] Run BTS! 2019 - EP.71 :: Behind the scene',
- 'creator': 'BTS+',
- 'view_count': int,
- 'subtitles': 'mincount:10',
- },
- 'skip': 'This video is only available for CH+ subscribers',
- }]
-
- @classmethod
- def suitable(cls, url):
- return False if VLivePlaylistIE.suitable(url) else super(VLiveIE, cls).suitable(url)
-
- def _real_initialize(self):
- self._login()
-
- def _login(self):
- email, password = self._get_login_info()
- if None in (email, password):
- return
-
- def is_logged_in():
- login_info = self._download_json(
- 'https://www.vlive.tv/auth/loginInfo', None,
- note='Downloading login info',
- headers={'Referer': 'https://www.vlive.tv/home'})
- return try_get(
- login_info, lambda x: x['message']['login'], bool) or False
-
- LOGIN_URL = 'https://www.vlive.tv/auth/email/login'
- self._request_webpage(
- LOGIN_URL, None, note='Downloading login cookies')
-
- self._download_webpage(
- LOGIN_URL, None, note='Logging in',
- data=urlencode_postdata({'email': email, 'pwd': password}),
- headers={
- 'Referer': LOGIN_URL,
- 'Content-Type': 'application/x-www-form-urlencoded'
- })
-
- if not is_logged_in():
- raise ExtractorError('Unable to log in', expected=True)
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(
- 'https://www.vlive.tv/video/%s' % video_id, video_id)
-
- VIDEO_PARAMS_RE = r'\bvlive\.video\.init\(([^)]+)'
- VIDEO_PARAMS_FIELD = 'video params'
-
- params = self._parse_json(self._search_regex(
- VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD, default=''), video_id,
- transform_source=lambda s: '[' + s + ']', fatal=False)
-
- if not params or len(params) < 7:
- params = self._search_regex(
- VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD)
- params = [p.strip(r'"') for p in re.split(r'\s*,\s*', params)]
-
- status, long_video_id, key = params[2], params[5], params[6]
- status = remove_start(status, 'PRODUCT_')
-
- if status in ('LIVE_ON_AIR', 'BIG_EVENT_ON_AIR'):
- return self._live(video_id, webpage)
- elif status in ('VOD_ON_AIR', 'BIG_EVENT_INTRO'):
- return self._replay(video_id, webpage, long_video_id, key)
-
- if status == 'LIVE_END':
- raise ExtractorError('Uploading for replay. Please wait...',
- expected=True)
- elif status == 'COMING_SOON':
- raise ExtractorError('Coming soon!', expected=True)
- elif status == 'CANCELED':
- raise ExtractorError('We are sorry, '
- 'but the live broadcast has been canceled.',
- expected=True)
- elif status == 'ONLY_APP':
- raise ExtractorError('Unsupported video type', expected=True)
- else:
- raise ExtractorError('Unknown status %s' % status)
-
- def _get_common_fields(self, webpage):
- title = self._og_search_title(webpage)
- creator = self._html_search_regex(
- r'<div[^>]+class="info_area"[^>]*>\s*(?:<em[^>]*>.*?</em\s*>\s*)?<a\s+[^>]*>([^<]+)',
- webpage, 'creator', fatal=False)
- thumbnail = self._og_search_thumbnail(webpage)
- return {
- 'title': title,
- 'creator': creator,
- 'thumbnail': thumbnail,
- }
-
- def _live(self, video_id, webpage):
- init_page = self._download_init_page(video_id)
-
- live_params = self._search_regex(
- r'"liveStreamInfo"\s*:\s*(".*"),',
- init_page, 'live stream info')
- live_params = self._parse_json(live_params, video_id)
- live_params = self._parse_json(live_params, video_id)
-
- formats = []
- for vid in live_params.get('resolutions', []):
- formats.extend(self._extract_m3u8_formats(
- vid['cdnUrl'], video_id, 'mp4',
- m3u8_id=vid.get('name'),
- fatal=False, live=True))
- self._sort_formats(formats)
-
- info = self._get_common_fields(webpage)
- info.update({
- 'title': self._live_title(info['title']),
- 'id': video_id,
- 'formats': formats,
- 'is_live': True,
- })
- return info
-
- def _replay(self, video_id, webpage, long_video_id, key):
- if '' in (long_video_id, key):
- init_page = self._download_init_page(video_id)
- video_info = self._parse_json(self._search_regex(
- (r'(?s)oVideoStatus\s*=\s*({.+?})\s*</script',
- r'(?s)oVideoStatus\s*=\s*({.+})'), init_page, 'video info'),
- video_id)
- if video_info.get('status') == 'NEED_CHANNEL_PLUS':
- self.raise_login_required(
- 'This video is only available for CH+ subscribers')
- long_video_id, key = video_info['vid'], video_info['inkey']
-
- playinfo = self._download_json(
- 'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?%s'
- % compat_urllib_parse_urlencode({
- 'videoId': long_video_id,
- 'key': key,
- 'ptc': 'http',
- 'doct': 'json', # document type (xml or json)
- 'cpt': 'vtt', # captions type (vtt or ttml)
- }), video_id)
-
- formats = [{
- 'url': vid['source'],
- 'format_id': vid.get('encodingOption', {}).get('name'),
- 'abr': float_or_none(vid.get('bitrate', {}).get('audio')),
- 'vbr': float_or_none(vid.get('bitrate', {}).get('video')),
- 'width': int_or_none(vid.get('encodingOption', {}).get('width')),
- 'height': int_or_none(vid.get('encodingOption', {}).get('height')),
- 'filesize': int_or_none(vid.get('size')),
- } for vid in playinfo.get('videos', {}).get('list', []) if vid.get('source')]
- self._sort_formats(formats)
-
- view_count = int_or_none(playinfo.get('meta', {}).get('count'))
-
- subtitles = {}
- for caption in playinfo.get('captions', {}).get('list', []):
- lang = dict_get(caption, ('locale', 'language', 'country', 'label'))
- if lang and caption.get('source'):
- subtitles[lang] = [{
- 'ext': 'vtt',
- 'url': caption['source']}]
-
- info = self._get_common_fields(webpage)
- info.update({
- 'id': video_id,
- 'formats': formats,
- 'view_count': view_count,
- 'subtitles': subtitles,
- })
- return info
-
- def _download_init_page(self, video_id):
- return self._download_webpage(
- 'https://www.vlive.tv/video/init/view',
- video_id, note='Downloading live webpage',
- data=urlencode_postdata({'videoSeq': video_id}),
- headers={
- 'Referer': 'https://www.vlive.tv/video/%s' % video_id,
- 'Content-Type': 'application/x-www-form-urlencoded'
- })
-
-
-class VLiveChannelIE(InfoExtractor):
- IE_NAME = 'vlive:channel'
- _VALID_URL = r'https?://channels\.vlive\.tv/(?P<id>[0-9A-Z]+)'
- _TEST = {
- 'url': 'http://channels.vlive.tv/FCD4B',
- 'info_dict': {
- 'id': 'FCD4B',
- 'title': 'MAMAMOO',
- },
- 'playlist_mincount': 110
- }
- _APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
-
- def _real_extract(self, url):
- channel_code = self._match_id(url)
-
- webpage = self._download_webpage(
- 'http://channels.vlive.tv/%s/video' % channel_code, channel_code)
-
- app_id = None
-
- app_js_url = self._search_regex(
- r'<script[^>]+src=(["\'])(?P<url>http.+?/app\.js.*?)\1',
- webpage, 'app js', default=None, group='url')
-
- if app_js_url:
- app_js = self._download_webpage(
- app_js_url, channel_code, 'Downloading app JS', fatal=False)
- if app_js:
- app_id = self._search_regex(
- r'Global\.VFAN_APP_ID\s*=\s*[\'"]([^\'"]+)[\'"]',
- app_js, 'app id', default=None)
-
- app_id = app_id or self._APP_ID
-
- channel_info = self._download_json(
- 'http://api.vfan.vlive.tv/vproxy/channelplus/decodeChannelCode',
- channel_code, note='Downloading decode channel code',
- query={
- 'app_id': app_id,
- 'channelCode': channel_code,
- '_': int(time.time())
- })
-
- channel_seq = channel_info['result']['channelSeq']
- channel_name = None
- entries = []
-
- for page_num in itertools.count(1):
- video_list = self._download_json(
- 'http://api.vfan.vlive.tv/vproxy/channelplus/getChannelVideoList',
- channel_code, note='Downloading channel list page #%d' % page_num,
- query={
- 'app_id': app_id,
- 'channelSeq': channel_seq,
- # Large values of maxNumOfRows (~300 or above) may cause
- # empty responses (see [1]), e.g. this happens for [2] that
- # has more than 300 videos.
- # 1. https://github.com/ytdl-org/youtube-dl/issues/13830
- # 2. http://channels.vlive.tv/EDBF.
- 'maxNumOfRows': 100,
- '_': int(time.time()),
- 'pageNo': page_num
- }
- )
-
- if not channel_name:
- channel_name = try_get(
- video_list,
- lambda x: x['result']['channelInfo']['channelName'],
- compat_str)
-
- videos = try_get(
- video_list, lambda x: x['result']['videoList'], list)
- if not videos:
- break
-
- for video in videos:
- video_id = video.get('videoSeq')
- if not video_id:
- continue
- video_id = compat_str(video_id)
- entries.append(
- self.url_result(
- 'http://www.vlive.tv/video/%s' % video_id,
- ie=VLiveIE.ie_key(), video_id=video_id))
-
- return self.playlist_result(
- entries, channel_code, channel_name)
-
-
-class VLivePlaylistIE(InfoExtractor):
- IE_NAME = 'vlive:playlist'
- _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<video_id>[0-9]+)/playlist/(?P<id>[0-9]+)'
- _VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s'
- _TESTS = [{
- # regular working playlist
- 'url': 'https://www.vlive.tv/video/117956/playlist/117963',
- 'info_dict': {
- 'id': '117963',
- 'title': '아이돌룸(IDOL ROOM) 41회 - (여자)아이들'
- },
- 'playlist_mincount': 10
- }, {
- # playlist with no playlistVideoSeqs
- 'url': 'http://www.vlive.tv/video/22867/playlist/22912',
- 'info_dict': {
- 'id': '22867',
- 'ext': 'mp4',
- 'title': '[V LIVE] Valentine Day Message from MINA',
- 'creator': 'TWICE',
- 'view_count': int
- },
- 'params': {
- 'skip_download': True,
- }
- }]
-
- def _build_video_result(self, video_id, message):
- self.to_screen(message)
- return self.url_result(
- self._VIDEO_URL_TEMPLATE % video_id,
- ie=VLiveIE.ie_key(), video_id=video_id)
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id, playlist_id = mobj.group('video_id', 'id')
-
- if self._downloader.params.get('noplaylist'):
- return self._build_video_result(
- video_id,
- 'Downloading just video %s because of --no-playlist'
- % video_id)
-
- self.to_screen(
- 'Downloading playlist %s - add --no-playlist to just download video'
- % playlist_id)
-
- webpage = self._download_webpage(
- 'http://www.vlive.tv/video/%s/playlist/%s'
- % (video_id, playlist_id), playlist_id)
-
- raw_item_ids = self._search_regex(
- r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage,
- 'playlist video seqs', default=None, fatal=False)
-
- if not raw_item_ids:
- return self._build_video_result(
- video_id,
- 'Downloading just video %s because no playlist was found'
- % video_id)
-
- item_ids = self._parse_json(raw_item_ids, playlist_id)
-
- entries = [
- self.url_result(
- self._VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(),
- video_id=compat_str(item_id))
- for item_id in item_ids]
-
- playlist_name = self._html_search_regex(
- r'<div[^>]+class="[^"]*multicam_playlist[^>]*>\s*<h3[^>]+>([^<]+)',
- webpage, 'playlist title', fatal=False)
-
- return self.playlist_result(entries, playlist_id, playlist_name)
diff --git a/youtube_dl/extractor/vodplatform.py b/youtube_dl/extractor/vodplatform.py
deleted file mode 100644
index 239644340..000000000
--- a/youtube_dl/extractor/vodplatform.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import unescapeHTML
-
-
-class VODPlatformIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?vod-platform\.net/[eE]mbed/(?P<id>[^/?#]+)'
- _TEST = {
- # from http://www.lbcgroup.tv/watch/chapter/29143/52844/%D8%A7%D9%84%D9%86%D8%B5%D8%B1%D8%A9-%D9%81%D9%8A-%D8%B6%D9%8A%D8%A7%D9%81%D8%A9-%D8%A7%D9%84%D9%80-cnn/ar
- 'url': 'http://vod-platform.net/embed/RufMcytHDolTH1MuKHY9Fw',
- 'md5': '1db2b7249ce383d6be96499006e951fc',
- 'info_dict': {
- 'id': 'RufMcytHDolTH1MuKHY9Fw',
- 'ext': 'mp4',
- 'title': 'LBCi News_ النصرة في ضيافة الـ "سي.أن.أن"',
- }
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- webpage = self._download_webpage(url, video_id)
-
- title = unescapeHTML(self._og_search_title(webpage))
- hidden_inputs = self._hidden_inputs(webpage)
-
- formats = self._extract_wowza_formats(
- hidden_inputs.get('HiddenmyhHlsLink') or hidden_inputs['HiddenmyDashLink'], video_id, skip_protocols=['f4m', 'smil'])
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': hidden_inputs.get('HiddenThumbnail') or self._og_search_thumbnail(webpage),
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/voicerepublic.py b/youtube_dl/extractor/voicerepublic.py
deleted file mode 100644
index 59e1359c4..000000000
--- a/youtube_dl/extractor/voicerepublic.py
+++ /dev/null
@@ -1,100 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import (
- compat_str,
- compat_urlparse,
-)
-from ..utils import (
- ExtractorError,
- determine_ext,
- int_or_none,
- sanitized_Request,
-)
-
-
-class VoiceRepublicIE(InfoExtractor):
- _VALID_URL = r'https?://voicerepublic\.com/(?:talks|embed)/(?P<id>[0-9a-z-]+)'
- _TESTS = [{
- 'url': 'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state',
- 'md5': 'b9174d651323f17783000876347116e3',
- 'info_dict': {
- 'id': '2296',
- 'display_id': 'watching-the-watchers-building-a-sousveillance-state',
- 'ext': 'm4a',
- 'title': 'Watching the Watchers: Building a Sousveillance State',
- 'description': 'Secret surveillance programs have metadata too. The people and companies that operate secret surveillance programs can be surveilled.',
- 'thumbnail': r're:^https?://.*\.(?:png|jpg)$',
- 'duration': 1800,
- 'view_count': int,
- }
- }, {
- 'url': 'http://voicerepublic.com/embed/watching-the-watchers-building-a-sousveillance-state',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- display_id = self._match_id(url)
-
- req = sanitized_Request(
- compat_urlparse.urljoin(url, '/talks/%s' % display_id))
- # Older versions of Firefox get redirected to an "upgrade browser" page
- req.add_header('User-Agent', 'youtube-dl')
- webpage = self._download_webpage(req, display_id)
-
- if '>Queued for processing, please stand by...<' in webpage:
- raise ExtractorError(
- 'Audio is still queued for processing', expected=True)
-
- config = self._search_regex(
- r'(?s)return ({.+?});\s*\n', webpage,
- 'data', default=None)
- data = self._parse_json(config, display_id, fatal=False) if config else None
- if data:
- title = data['title']
- description = data.get('teaser')
- talk_id = compat_str(data.get('talk_id') or display_id)
- talk = data['talk']
- duration = int_or_none(talk.get('duration'))
- formats = [{
- 'url': compat_urlparse.urljoin(url, talk_url),
- 'format_id': format_id,
- 'ext': determine_ext(talk_url) or format_id,
- 'vcodec': 'none',
- } for format_id, talk_url in talk['links'].items()]
- else:
- title = self._og_search_title(webpage)
- description = self._html_search_regex(
- r"(?s)<div class='talk-teaser'[^>]*>(.+?)</div>",
- webpage, 'description', fatal=False)
- talk_id = self._search_regex(
- [r"id='jc-(\d+)'", r"data-shareable-id='(\d+)'"],
- webpage, 'talk id', default=None) or display_id
- duration = None
- player = self._search_regex(
- r"class='vr-player jp-jplayer'([^>]+)>", webpage, 'player')
- formats = [{
- 'url': compat_urlparse.urljoin(url, talk_url),
- 'format_id': format_id,
- 'ext': determine_ext(talk_url) or format_id,
- 'vcodec': 'none',
- } for format_id, talk_url in re.findall(r"data-([^=]+)='([^']+)'", player)]
- self._sort_formats(formats)
-
- thumbnail = self._og_search_thumbnail(webpage)
- view_count = int_or_none(self._search_regex(
- r"class='play-count[^']*'>\s*(\d+) plays",
- webpage, 'play count', fatal=False))
-
- return {
- 'id': talk_id,
- 'display_id': display_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'view_count': view_count,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/vzaar.py b/youtube_dl/extractor/vzaar.py
deleted file mode 100644
index 3336e6c15..000000000
--- a/youtube_dl/extractor/vzaar.py
+++ /dev/null
@@ -1,95 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- int_or_none,
- float_or_none,
- unified_timestamp,
- url_or_none,
-)
-
-
-class VzaarIE(InfoExtractor):
- _VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P<id>\d+)'
- _TESTS = [{
- # HTTP and HLS
- 'url': 'https://vzaar.com/videos/1152805',
- 'md5': 'bde5ddfeb104a6c56a93a06b04901dbf',
- 'info_dict': {
- 'id': '1152805',
- 'ext': 'mp4',
- 'title': 'sample video (public)',
- },
- }, {
- 'url': 'https://view.vzaar.com/27272/player',
- 'md5': '3b50012ac9bbce7f445550d54e0508f2',
- 'info_dict': {
- 'id': '27272',
- 'ext': 'mp3',
- 'title': 'MP3',
- },
- }, {
- # with null videoTitle
- 'url': 'https://view.vzaar.com/20313539/download',
- 'only_matching': True,
- }]
-
- @staticmethod
- def _extract_urls(webpage):
- return re.findall(
- r'<iframe[^>]+src=["\']((?:https?:)?//(?:view\.vzaar\.com)/[0-9]+)',
- webpage)
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- video_data = self._download_json(
- 'http://view.vzaar.com/v2/%s/video' % video_id, video_id)
-
- title = video_data.get('videoTitle') or video_id
-
- formats = []
-
- source_url = url_or_none(video_data.get('sourceUrl'))
- if source_url:
- f = {
- 'url': source_url,
- 'format_id': 'http',
- }
- if 'audio' in source_url:
- f.update({
- 'vcodec': 'none',
- 'ext': 'mp3',
- })
- else:
- f.update({
- 'width': int_or_none(video_data.get('width')),
- 'height': int_or_none(video_data.get('height')),
- 'ext': 'mp4',
- 'fps': float_or_none(video_data.get('fps')),
- })
- formats.append(f)
-
- video_guid = video_data.get('guid')
- usp = video_data.get('usp')
- if isinstance(video_guid, compat_str) and isinstance(usp, dict):
- m3u8_url = ('http://fable.vzaar.com/v4/usp/%s/%s.ism/.m3u8?'
- % (video_guid, video_id)) + '&'.join(
- '%s=%s' % (k, v) for k, v in usp.items())
- formats.extend(self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
-
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': self._proto_relative_url(video_data.get('poster')),
- 'duration': float_or_none(video_data.get('videoDuration')),
- 'timestamp': unified_timestamp(video_data.get('ts')),
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py
deleted file mode 100644
index fa142b974..000000000
--- a/youtube_dl/extractor/wistia.py
+++ /dev/null
@@ -1,126 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- ExtractorError,
- int_or_none,
- float_or_none,
- unescapeHTML,
-)
-
-
-class WistiaIE(InfoExtractor):
- _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P<id>[a-z0-9]+)'
- _API_URL = 'http://fast.wistia.com/embed/medias/%s.json'
- _IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s'
-
- _TESTS = [{
- 'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
- 'md5': 'cafeb56ec0c53c18c97405eecb3133df',
- 'info_dict': {
- 'id': 'sh7fpupwlt',
- 'ext': 'mov',
- 'title': 'Being Resourceful',
- 'description': 'a Clients From Hell Video Series video from worldwidewebhosting',
- 'upload_date': '20131204',
- 'timestamp': 1386185018,
- 'duration': 117,
- },
- }, {
- 'url': 'wistia:sh7fpupwlt',
- 'only_matching': True,
- }, {
- # with hls video
- 'url': 'wistia:807fafadvk',
- 'only_matching': True,
- }, {
- 'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt',
- 'only_matching': True,
- }, {
- 'url': 'http://fast.wistia.net/embed/medias/sh7fpupwlt.json',
- 'only_matching': True,
- }]
-
- @staticmethod
- def _extract_url(webpage):
- match = re.search(
- r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/iframe/.+?)\1', webpage)
- if match:
- return unescapeHTML(match.group('url'))
-
- match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
- if match:
- return 'wistia:%s' % match.group('id')
-
- match = re.search(
- r'''(?sx)
- <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
- <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
- ''', webpage)
- if match:
- return 'wistia:%s' % match.group('id')
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- data_json = self._download_json(
- self._API_URL % video_id, video_id,
- # Some videos require this.
- headers={
- 'Referer': url if url.startswith('http') else self._IFRAME_URL % video_id,
- })
-
- if data_json.get('error'):
- raise ExtractorError(
- 'Error while getting the playlist', expected=True)
-
- data = data_json['media']
- title = data['name']
-
- formats = []
- thumbnails = []
- for a in data['assets']:
- aurl = a.get('url')
- if not aurl:
- continue
- astatus = a.get('status')
- atype = a.get('type')
- if (astatus is not None and astatus != 2) or atype in ('preview', 'storyboard'):
- continue
- elif atype in ('still', 'still_image'):
- thumbnails.append({
- 'url': aurl,
- 'width': int_or_none(a.get('width')),
- 'height': int_or_none(a.get('height')),
- })
- else:
- aext = a.get('ext')
- is_m3u8 = a.get('container') == 'm3u8' or aext == 'm3u8'
- formats.append({
- 'format_id': atype,
- 'url': aurl,
- 'tbr': int_or_none(a.get('bitrate')),
- 'vbr': int_or_none(a.get('opt_vbitrate')),
- 'width': int_or_none(a.get('width')),
- 'height': int_or_none(a.get('height')),
- 'filesize': int_or_none(a.get('size')),
- 'vcodec': a.get('codec'),
- 'container': a.get('container'),
- 'ext': 'mp4' if is_m3u8 else aext,
- 'protocol': 'm3u8' if is_m3u8 else None,
- 'preference': 1 if atype == 'original' else None,
- })
-
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': data.get('seoDescription'),
- 'formats': formats,
- 'thumbnails': thumbnails,
- 'duration': float_or_none(data.get('duration')),
- 'timestamp': int_or_none(data.get('createdAt')),
- }
diff --git a/youtube_dl/extractor/xfileshare.py b/youtube_dl/extractor/xfileshare.py
deleted file mode 100644
index b38c7a7b3..000000000
--- a/youtube_dl/extractor/xfileshare.py
+++ /dev/null
@@ -1,213 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- decode_packed_codes,
- determine_ext,
- ExtractorError,
- int_or_none,
- NO_DEFAULT,
- urlencode_postdata,
-)
-
-
-class XFileShareIE(InfoExtractor):
- _SITES = (
- (r'daclips\.(?:in|com)', 'DaClips'),
- (r'filehoot\.com', 'FileHoot'),
- (r'gorillavid\.(?:in|com)', 'GorillaVid'),
- (r'movpod\.in', 'MovPod'),
- (r'powerwatch\.pw', 'PowerWatch'),
- (r'rapidvideo\.ws', 'Rapidvideo.ws'),
- (r'thevideobee\.to', 'TheVideoBee'),
- (r'vidto\.(?:me|se)', 'Vidto'),
- (r'streamin\.to', 'Streamin.To'),
- (r'xvidstage\.com', 'XVIDSTAGE'),
- (r'vidabc\.com', 'Vid ABC'),
- (r'vidbom\.com', 'VidBom'),
- (r'vidlo\.us', 'vidlo'),
- (r'rapidvideo\.(?:cool|org)', 'RapidVideo.TV'),
- (r'fastvideo\.me', 'FastVideo.me'),
- )
-
- IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
- _VALID_URL = (r'https?://(?P<host>(?:www\.)?(?:%s))/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
- % '|'.join(site for site in list(zip(*_SITES))[0]))
-
- _FILE_NOT_FOUND_REGEXES = (
- r'>(?:404 - )?File Not Found<',
- r'>The file was removed by administrator<',
- )
-
- _TESTS = [{
- 'url': 'http://gorillavid.in/06y9juieqpmi',
- 'md5': '5ae4a3580620380619678ee4875893ba',
- 'info_dict': {
- 'id': '06y9juieqpmi',
- 'ext': 'mp4',
- 'title': 'Rebecca Black My Moment Official Music Video Reaction-6GK87Rc8bzQ',
- 'thumbnail': r're:http://.*\.jpg',
- },
- }, {
- 'url': 'http://gorillavid.in/embed-z08zf8le23c6-960x480.html',
- 'only_matching': True,
- }, {
- 'url': 'http://daclips.in/3rso4kdn6f9m',
- 'md5': '1ad8fd39bb976eeb66004d3a4895f106',
- 'info_dict': {
- 'id': '3rso4kdn6f9m',
- 'ext': 'mp4',
- 'title': 'Micro Pig piglets ready on 16th July 2009-bG0PdrCdxUc',
- 'thumbnail': r're:http://.*\.jpg',
- }
- }, {
- 'url': 'http://movpod.in/0wguyyxi1yca',
- 'only_matching': True,
- }, {
- 'url': 'http://filehoot.com/3ivfabn7573c.html',
- 'info_dict': {
- 'id': '3ivfabn7573c',
- 'ext': 'mp4',
- 'title': 'youtube-dl test video \'äBaW_jenozKc.mp4.mp4',
- 'thumbnail': r're:http://.*\.jpg',
- },
- 'skip': 'Video removed',
- }, {
- 'url': 'http://vidto.me/ku5glz52nqe1.html',
- 'info_dict': {
- 'id': 'ku5glz52nqe1',
- 'ext': 'mp4',
- 'title': 'test'
- }
- }, {
- 'url': 'http://powerwatch.pw/duecjibvicbu',
- 'info_dict': {
- 'id': 'duecjibvicbu',
- 'ext': 'mp4',
- 'title': 'Big Buck Bunny trailer',
- },
- }, {
- 'url': 'http://xvidstage.com/e0qcnl03co6z',
- 'info_dict': {
- 'id': 'e0qcnl03co6z',
- 'ext': 'mp4',
- 'title': 'Chucky Prank 2015.mp4',
- },
- }, {
- # removed by administrator
- 'url': 'http://xvidstage.com/amfy7atlkx25',
- 'only_matching': True,
- }, {
- 'url': 'http://vidabc.com/i8ybqscrphfv',
- 'info_dict': {
- 'id': 'i8ybqscrphfv',
- 'ext': 'mp4',
- 'title': 're:Beauty and the Beast 2017',
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'http://www.rapidvideo.cool/b667kprndr8w',
- 'only_matching': True,
- }, {
- 'url': 'http://www.fastvideo.me/k8604r8nk8sn/FAST_FURIOUS_8_-_Trailer_italiano_ufficiale.mp4.html',
- 'only_matching': True,
- }, {
- 'url': 'http://vidto.se/1tx1pf6t12cg.html',
- 'only_matching': True,
- }]
-
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1'
- % '|'.join(site for site in list(zip(*XFileShareIE._SITES))[0]),
- webpage)]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
-
- url = 'http://%s/%s' % (mobj.group('host'), video_id)
- webpage = self._download_webpage(url, video_id)
-
- if any(re.search(p, webpage) for p in self._FILE_NOT_FOUND_REGEXES):
- raise ExtractorError('Video %s does not exist' % video_id, expected=True)
-
- fields = self._hidden_inputs(webpage)
-
- if fields['op'] == 'download1':
- countdown = int_or_none(self._search_regex(
- r'<span id="countdown_str">(?:[Ww]ait)?\s*<span id="cxc">(\d+)</span>\s*(?:seconds?)?</span>',
- webpage, 'countdown', default=None))
- if countdown:
- self._sleep(countdown, video_id)
-
- webpage = self._download_webpage(
- url, video_id, 'Downloading video page',
- data=urlencode_postdata(fields), headers={
- 'Referer': url,
- 'Content-type': 'application/x-www-form-urlencoded',
- })
-
- title = (self._search_regex(
- (r'style="z-index: [0-9]+;">([^<]+)</span>',
- r'<td nowrap>([^<]+)</td>',
- r'h4-fine[^>]*>([^<]+)<',
- r'>Watch (.+) ',
- r'<h2 class="video-page-head">([^<]+)</h2>',
- r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<'), # streamin.to
- webpage, 'title', default=None) or self._og_search_title(
- webpage, default=None) or video_id).strip()
-
- def extract_formats(default=NO_DEFAULT):
- urls = []
- for regex in (
- r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
- r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1',
- r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)',
- r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'):
- for mobj in re.finditer(regex, webpage):
- video_url = mobj.group('url')
- if video_url not in urls:
- urls.append(video_url)
- formats = []
- for video_url in urls:
- if determine_ext(video_url) == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- video_url, video_id, 'mp4',
- entry_protocol='m3u8_native', m3u8_id='hls',
- fatal=False))
- else:
- formats.append({
- 'url': video_url,
- 'format_id': 'sd',
- })
- if not formats and default is not NO_DEFAULT:
- return default
- self._sort_formats(formats)
- return formats
-
- formats = extract_formats(default=None)
-
- if not formats:
- webpage = decode_packed_codes(self._search_regex(
- r"(}\('(.+)',(\d+),(\d+),'[^']*\b(?:file|embed)\b[^']*'\.split\('\|'\))",
- webpage, 'packed code'))
- formats = extract_formats()
-
- thumbnail = self._search_regex(
- r'image\s*:\s*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', default=None)
-
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/xhamster.py b/youtube_dl/extractor/xhamster.py
deleted file mode 100644
index a5b94d279..000000000
--- a/youtube_dl/extractor/xhamster.py
+++ /dev/null
@@ -1,381 +0,0 @@
-from __future__ import unicode_literals
-
-import itertools
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- clean_html,
- determine_ext,
- dict_get,
- extract_attributes,
- ExtractorError,
- int_or_none,
- parse_duration,
- try_get,
- unified_strdate,
- url_or_none,
-)
-
-
-class XHamsterIE(InfoExtractor):
- _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster[27]\.com)'
- _VALID_URL = r'''(?x)
- https?://
- (?:.+?\.)?%s/
- (?:
- movies/(?P<id>\d+)/(?P<display_id>[^/]*)\.html|
- videos/(?P<display_id_2>[^/]*)-(?P<id_2>\d+)
- )
- ''' % _DOMAINS
- _TESTS = [{
- 'url': 'https://xhamster.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
- 'md5': '98b4687efb1ffd331c4197854dc09e8f',
- 'info_dict': {
- 'id': '1509445',
- 'display_id': 'femaleagent-shy-beauty-takes-the-bait',
- 'ext': 'mp4',
- 'title': 'FemaleAgent Shy beauty takes the bait',
- 'timestamp': 1350194821,
- 'upload_date': '20121014',
- 'uploader': 'Ruseful2011',
- 'duration': 893,
- 'age_limit': 18,
- },
- }, {
- 'url': 'https://xhamster.com/videos/britney-spears-sexy-booty-2221348?hd=',
- 'info_dict': {
- 'id': '2221348',
- 'display_id': 'britney-spears-sexy-booty',
- 'ext': 'mp4',
- 'title': 'Britney Spears Sexy Booty',
- 'timestamp': 1379123460,
- 'upload_date': '20130914',
- 'uploader': 'jojo747400',
- 'duration': 200,
- 'age_limit': 18,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- # empty seo, unavailable via new URL schema
- 'url': 'http://xhamster.com/movies/5667973/.html',
- 'info_dict': {
- 'id': '5667973',
- 'ext': 'mp4',
- 'title': '....',
- 'timestamp': 1454948101,
- 'upload_date': '20160208',
- 'uploader': 'parejafree',
- 'duration': 72,
- 'age_limit': 18,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- # mobile site
- 'url': 'https://m.xhamster.com/videos/cute-teen-jacqueline-solo-masturbation-8559111',
- 'only_matching': True,
- }, {
- 'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html',
- 'only_matching': True,
- }, {
- # This video is visible for marcoalfa123456's friends only
- 'url': 'https://it.xhamster.com/movies/7263980/la_mia_vicina.html',
- 'only_matching': True,
- }, {
- # new URL schema
- 'url': 'https://pt.xhamster.com/videos/euro-pedal-pumping-7937821',
- 'only_matching': True,
- }, {
- 'url': 'https://xhamster.one/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
- 'only_matching': True,
- }, {
- 'url': 'https://xhamster.desi/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
- 'only_matching': True,
- }, {
- 'url': 'https://xhamster2.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
- 'only_matching': True,
- }, {
- 'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
- 'only_matching': True,
- }, {
- 'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id') or mobj.group('id_2')
- display_id = mobj.group('display_id') or mobj.group('display_id_2')
-
- desktop_url = re.sub(r'^(https?://(?:.+?\.)?)m\.', r'\1', url)
- webpage = self._download_webpage(desktop_url, video_id)
-
- error = self._html_search_regex(
- r'<div[^>]+id=["\']videoClosed["\'][^>]*>(.+?)</div>',
- webpage, 'error', default=None)
- if error:
- raise ExtractorError(error, expected=True)
-
- age_limit = self._rta_search(webpage)
-
- def get_height(s):
- return int_or_none(self._search_regex(
- r'^(\d+)[pP]', s, 'height', default=None))
-
- initials = self._parse_json(
- self._search_regex(
- r'window\.initials\s*=\s*({.+?})\s*;\s*\n', webpage, 'initials',
- default='{}'),
- video_id, fatal=False)
- if initials:
- video = initials['videoModel']
- title = video['title']
- formats = []
- for format_id, formats_dict in video['sources'].items():
- if not isinstance(formats_dict, dict):
- continue
- for quality, format_item in formats_dict.items():
- if format_id == 'download':
- # Download link takes some time to be generated,
- # skipping for now
- continue
- if not isinstance(format_item, dict):
- continue
- format_url = format_item.get('link')
- filesize = int_or_none(
- format_item.get('size'), invscale=1000000)
- else:
- format_url = format_item
- filesize = None
- format_url = url_or_none(format_url)
- if not format_url:
- continue
- formats.append({
- 'format_id': '%s-%s' % (format_id, quality),
- 'url': format_url,
- 'ext': determine_ext(format_url, 'mp4'),
- 'height': get_height(quality),
- 'filesize': filesize,
- })
- self._sort_formats(formats)
-
- categories_list = video.get('categories')
- if isinstance(categories_list, list):
- categories = []
- for c in categories_list:
- if not isinstance(c, dict):
- continue
- c_name = c.get('name')
- if isinstance(c_name, compat_str):
- categories.append(c_name)
- else:
- categories = None
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': video.get('description'),
- 'timestamp': int_or_none(video.get('created')),
- 'uploader': try_get(
- video, lambda x: x['author']['name'], compat_str),
- 'thumbnail': video.get('thumbURL'),
- 'duration': int_or_none(video.get('duration')),
- 'view_count': int_or_none(video.get('views')),
- 'like_count': int_or_none(try_get(
- video, lambda x: x['rating']['likes'], int)),
- 'dislike_count': int_or_none(try_get(
- video, lambda x: x['rating']['dislikes'], int)),
- 'comment_count': int_or_none(video.get('views')),
- 'age_limit': age_limit,
- 'categories': categories,
- 'formats': formats,
- }
-
- # Old layout fallback
-
- title = self._html_search_regex(
- [r'<h1[^>]*>([^<]+)</h1>',
- r'<meta[^>]+itemprop=".*?caption.*?"[^>]+content="(.+?)"',
- r'<title[^>]*>(.+?)(?:,\s*[^,]*?\s*Porn\s*[^,]*?:\s*xHamster[^<]*| - xHamster\.com)</title>'],
- webpage, 'title')
-
- formats = []
- format_urls = set()
-
- sources = self._parse_json(
- self._search_regex(
- r'sources\s*:\s*({.+?})\s*,?\s*\n', webpage, 'sources',
- default='{}'),
- video_id, fatal=False)
- for format_id, format_url in sources.items():
- format_url = url_or_none(format_url)
- if not format_url:
- continue
- if format_url in format_urls:
- continue
- format_urls.add(format_url)
- formats.append({
- 'format_id': format_id,
- 'url': format_url,
- 'height': get_height(format_id),
- })
-
- video_url = self._search_regex(
- [r'''file\s*:\s*(?P<q>["'])(?P<mp4>.+?)(?P=q)''',
- r'''<a\s+href=(?P<q>["'])(?P<mp4>.+?)(?P=q)\s+class=["']mp4Thumb''',
- r'''<video[^>]+file=(?P<q>["'])(?P<mp4>.+?)(?P=q)[^>]*>'''],
- webpage, 'video url', group='mp4', default=None)
- if video_url and video_url not in format_urls:
- formats.append({
- 'url': video_url,
- })
-
- self._sort_formats(formats)
-
- # Only a few videos have an description
- mobj = re.search(r'<span>Description: </span>([^<]+)', webpage)
- description = mobj.group(1) if mobj else None
-
- upload_date = unified_strdate(self._search_regex(
- r'hint=["\'](\d{4}-\d{2}-\d{2}) \d{2}:\d{2}:\d{2} [A-Z]{3,4}',
- webpage, 'upload date', fatal=False))
-
- uploader = self._html_search_regex(
- r'<span[^>]+itemprop=["\']author[^>]+><a[^>]+><span[^>]+>([^<]+)',
- webpage, 'uploader', default='anonymous')
-
- thumbnail = self._search_regex(
- [r'''["']thumbUrl["']\s*:\s*(?P<q>["'])(?P<thumbnail>.+?)(?P=q)''',
- r'''<video[^>]+"poster"=(?P<q>["'])(?P<thumbnail>.+?)(?P=q)[^>]*>'''],
- webpage, 'thumbnail', fatal=False, group='thumbnail')
-
- duration = parse_duration(self._search_regex(
- [r'<[^<]+\bitemprop=["\']duration["\'][^<]+\bcontent=["\'](.+?)["\']',
- r'Runtime:\s*</span>\s*([\d:]+)'], webpage,
- 'duration', fatal=False))
-
- view_count = int_or_none(self._search_regex(
- r'content=["\']User(?:View|Play)s:(\d+)',
- webpage, 'view count', fatal=False))
-
- mobj = re.search(r'hint=[\'"](?P<likecount>\d+) Likes / (?P<dislikecount>\d+) Dislikes', webpage)
- (like_count, dislike_count) = (mobj.group('likecount'), mobj.group('dislikecount')) if mobj else (None, None)
-
- mobj = re.search(r'</label>Comments \((?P<commentcount>\d+)\)</div>', webpage)
- comment_count = mobj.group('commentcount') if mobj else 0
-
- categories_html = self._search_regex(
- r'(?s)<table.+?(<span>Categories:.+?)</table>', webpage,
- 'categories', default=None)
- categories = [clean_html(category) for category in re.findall(
- r'<a[^>]+>(.+?)</a>', categories_html)] if categories_html else None
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': description,
- 'upload_date': upload_date,
- 'uploader': uploader,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'view_count': view_count,
- 'like_count': int_or_none(like_count),
- 'dislike_count': int_or_none(dislike_count),
- 'comment_count': int_or_none(comment_count),
- 'age_limit': age_limit,
- 'categories': categories,
- 'formats': formats,
- }
-
-
-class XHamsterEmbedIE(InfoExtractor):
- _VALID_URL = r'https?://(?:.+?\.)?%s/xembed\.php\?video=(?P<id>\d+)' % XHamsterIE._DOMAINS
- _TEST = {
- 'url': 'http://xhamster.com/xembed.php?video=3328539',
- 'info_dict': {
- 'id': '3328539',
- 'ext': 'mp4',
- 'title': 'Pen Masturbation',
- 'timestamp': 1406581861,
- 'upload_date': '20140728',
- 'uploader': 'ManyakisArt',
- 'duration': 5,
- 'age_limit': 18,
- }
- }
-
- @staticmethod
- def _extract_urls(webpage):
- return [url for _, url in re.findall(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?xhamster\.com/xembed\.php\?video=\d+)\1',
- webpage)]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- video_url = self._search_regex(
- r'href="(https?://xhamster\.com/(?:movies/{0}/[^"]*\.html|videos/[^/]*-{0})[^"]*)"'.format(video_id),
- webpage, 'xhamster url', default=None)
-
- if not video_url:
- vars = self._parse_json(
- self._search_regex(r'vars\s*:\s*({.+?})\s*,\s*\n', webpage, 'vars'),
- video_id)
- video_url = dict_get(vars, ('downloadLink', 'homepageLink', 'commentsLink', 'shareUrl'))
-
- return self.url_result(video_url, 'XHamster')
-
-
-class XHamsterUserIE(InfoExtractor):
- _VALID_URL = r'https?://(?:.+?\.)?%s/users/(?P<id>[^/?#&]+)' % XHamsterIE._DOMAINS
- _TESTS = [{
- # Paginated user profile
- 'url': 'https://xhamster.com/users/netvideogirls/videos',
- 'info_dict': {
- 'id': 'netvideogirls',
- },
- 'playlist_mincount': 267,
- }, {
- # Non-paginated user profile
- 'url': 'https://xhamster.com/users/firatkaan/videos',
- 'info_dict': {
- 'id': 'firatkaan',
- },
- 'playlist_mincount': 1,
- }]
-
- def _entries(self, user_id):
- next_page_url = 'https://xhamster.com/users/%s/videos/1' % user_id
- for pagenum in itertools.count(1):
- page = self._download_webpage(
- next_page_url, user_id, 'Downloading page %s' % pagenum)
- for video_tag in re.findall(
- r'(<a[^>]+class=["\'].*?\bvideo-thumb__image-container[^>]+>)',
- page):
- video = extract_attributes(video_tag)
- video_url = url_or_none(video.get('href'))
- if not video_url or not XHamsterIE.suitable(video_url):
- continue
- video_id = XHamsterIE._match_id(video_url)
- yield self.url_result(
- video_url, ie=XHamsterIE.ie_key(), video_id=video_id)
- mobj = re.search(r'<a[^>]+data-page=["\']next[^>]+>', page)
- if not mobj:
- break
- next_page = extract_attributes(mobj.group(0))
- next_page_url = url_or_none(next_page.get('href'))
- if not next_page_url:
- break
-
- def _real_extract(self, url):
- user_id = self._match_id(url)
- return self.playlist_result(self._entries(user_id), user_id)
diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py
deleted file mode 100644
index c6c0b3291..000000000
--- a/youtube_dl/extractor/xtube.py
+++ /dev/null
@@ -1,180 +0,0 @@
-from __future__ import unicode_literals
-
-import itertools
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- js_to_json,
- orderedSet,
- parse_duration,
- sanitized_Request,
- str_to_int,
-)
-
-
-class XTubeIE(InfoExtractor):
- _VALID_URL = r'''(?x)
- (?:
- xtube:|
- https?://(?:www\.)?xtube\.com/(?:watch\.php\?.*\bv=|video-watch/(?:embedded/)?(?P<display_id>[^/]+)-)
- )
- (?P<id>[^/?&#]+)
- '''
-
- _TESTS = [{
- # old URL schema
- 'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_',
- 'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab',
- 'info_dict': {
- 'id': 'kVTUy_G222_',
- 'ext': 'mp4',
- 'title': 'strange erotica',
- 'description': 'contains:an ET kind of thing',
- 'uploader': 'greenshowers',
- 'duration': 450,
- 'view_count': int,
- 'comment_count': int,
- 'age_limit': 18,
- }
- }, {
- # FLV videos with duplicated formats
- 'url': 'http://www.xtube.com/video-watch/A-Super-Run-Part-1-YT-9299752',
- 'md5': 'a406963eb349dd43692ec54631efd88b',
- 'info_dict': {
- 'id': '9299752',
- 'display_id': 'A-Super-Run-Part-1-YT',
- 'ext': 'flv',
- 'title': 'A Super Run - Part 1 (YT)',
- 'description': 'md5:ca0d47afff4a9b2942e4b41aa970fd93',
- 'uploader': 'tshirtguy59',
- 'duration': 579,
- 'view_count': int,
- 'comment_count': int,
- 'age_limit': 18,
- },
- }, {
- # new URL schema
- 'url': 'http://www.xtube.com/video-watch/strange-erotica-625837',
- 'only_matching': True,
- }, {
- 'url': 'xtube:625837',
- 'only_matching': True,
- }, {
- 'url': 'xtube:kVTUy_G222_',
- 'only_matching': True,
- }, {
- 'url': 'https://www.xtube.com/video-watch/embedded/milf-tara-and-teen-shared-and-cum-covered-extreme-bukkake-32203482?embedsize=big',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- display_id = mobj.group('display_id')
-
- if not display_id:
- display_id = video_id
-
- if video_id.isdigit() and len(video_id) < 11:
- url_pattern = 'http://www.xtube.com/video-watch/-%s'
- else:
- url_pattern = 'http://www.xtube.com/watch.php?v=%s'
-
- webpage = self._download_webpage(
- url_pattern % video_id, display_id, headers={
- 'Cookie': 'age_verified=1; cookiesAccepted=1',
- })
-
- sources = self._parse_json(self._search_regex(
- r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),',
- webpage, 'sources', group='sources'), video_id,
- transform_source=js_to_json)
-
- formats = []
- for format_id, format_url in sources.items():
- formats.append({
- 'url': format_url,
- 'format_id': format_id,
- 'height': int_or_none(format_id),
- })
- self._remove_duplicate_formats(formats)
- self._sort_formats(formats)
-
- title = self._search_regex(
- (r'<h1>\s*(?P<title>[^<]+?)\s*</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'),
- webpage, 'title', group='title')
- description = self._search_regex(
- r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False)
- uploader = self._search_regex(
- (r'<input[^>]+name="contentOwnerId"[^>]+value="([^"]+)"',
- r'<span[^>]+class="nickname"[^>]*>([^<]+)'),
- webpage, 'uploader', fatal=False)
- duration = parse_duration(self._search_regex(
- r'<dt>Runtime:?</dt>\s*<dd>([^<]+)</dd>',
- webpage, 'duration', fatal=False))
- view_count = str_to_int(self._search_regex(
- r'<dt>Views:?</dt>\s*<dd>([\d,\.]+)</dd>',
- webpage, 'view count', fatal=False))
- comment_count = str_to_int(self._html_search_regex(
- r'>Comments? \(([\d,\.]+)\)<',
- webpage, 'comment count', fatal=False))
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': description,
- 'uploader': uploader,
- 'duration': duration,
- 'view_count': view_count,
- 'comment_count': comment_count,
- 'age_limit': 18,
- 'formats': formats,
- }
-
-
-class XTubeUserIE(InfoExtractor):
- IE_DESC = 'XTube user profile'
- _VALID_URL = r'https?://(?:www\.)?xtube\.com/profile/(?P<id>[^/]+-\d+)'
- _TEST = {
- 'url': 'http://www.xtube.com/profile/greenshowers-4056496',
- 'info_dict': {
- 'id': 'greenshowers-4056496',
- 'age_limit': 18,
- },
- 'playlist_mincount': 155,
- }
-
- def _real_extract(self, url):
- user_id = self._match_id(url)
-
- entries = []
- for pagenum in itertools.count(1):
- request = sanitized_Request(
- 'http://www.xtube.com/profile/%s/videos/%d' % (user_id, pagenum),
- headers={
- 'Cookie': 'popunder=4',
- 'X-Requested-With': 'XMLHttpRequest',
- 'Referer': url,
- })
-
- page = self._download_json(
- request, user_id, 'Downloading videos JSON page %d' % pagenum)
-
- html = page.get('html')
- if not html:
- break
-
- for video_id in orderedSet([video_id for _, video_id in re.findall(
- r'data-plid=(["\'])(.+?)\1', html)]):
- entries.append(self.url_result('xtube:%s' % video_id, XTubeIE.ie_key()))
-
- page_count = int_or_none(page.get('pageCount'))
- if not page_count or pagenum == page_count:
- break
-
- playlist = self.playlist_result(entries, user_id)
- playlist['age_limit'] = 18
- return playlist
diff --git a/youtube_dl/extractor/yahoo.py b/youtube_dl/extractor/yahoo.py
deleted file mode 100644
index e5ebdd180..000000000
--- a/youtube_dl/extractor/yahoo.py
+++ /dev/null
@@ -1,689 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import hashlib
-import itertools
-import json
-import re
-
-from .common import InfoExtractor, SearchInfoExtractor
-from ..compat import (
- compat_str,
- compat_urllib_parse,
- compat_urlparse,
-)
-from ..utils import (
- clean_html,
- determine_ext,
- ExtractorError,
- extract_attributes,
- int_or_none,
- mimetype2ext,
- smuggle_url,
- try_get,
- unescapeHTML,
- url_or_none,
-)
-
-from .brightcove import (
- BrightcoveLegacyIE,
- BrightcoveNewIE,
-)
-from .nbc import NBCSportsVPlayerIE
-
-
-class YahooIE(InfoExtractor):
- IE_DESC = 'Yahoo screen and movies'
- _VALID_URL = r'(?P<host>https?://(?:(?P<country>[a-zA-Z]{2})\.)?[\da-zA-Z_-]+\.yahoo\.com)/(?:[^/]+/)*(?:(?P<display_id>.+)?-)?(?P<id>[0-9]+)(?:-[a-z]+)?(?:\.html)?'
- _TESTS = [
- {
- 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
- 'info_dict': {
- 'id': '2d25e626-2378-391f-ada0-ddaf1417e588',
- 'ext': 'mp4',
- 'title': 'Julian Smith & Travis Legg Watch Julian Smith',
- 'description': 'Julian and Travis watch Julian Smith',
- 'duration': 6863,
- },
- },
- {
- 'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html',
- 'md5': '251af144a19ebc4a033e8ba91ac726bb',
- 'info_dict': {
- 'id': 'd1dedf8c-d58c-38c3-8963-e899929ae0a9',
- 'ext': 'mp4',
- 'title': 'Codefellas - The Cougar Lies with Spanish Moss',
- 'description': 'md5:66b627ab0a282b26352136ca96ce73c1',
- 'duration': 151,
- },
- 'skip': 'HTTP Error 404',
- },
- {
- 'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed',
- 'md5': '7993e572fac98e044588d0b5260f4352',
- 'info_dict': {
- 'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb',
- 'ext': 'mp4',
- 'title': "Yahoo Saves 'Community'",
- 'description': 'md5:4d4145af2fd3de00cbb6c1d664105053',
- 'duration': 170,
- }
- },
- {
- 'url': 'https://tw.news.yahoo.com/%E6%95%A2%E5%95%8F%E5%B8%82%E9%95%B7%20%E9%BB%83%E7%A7%80%E9%9C%9C%E6%89%B9%E8%B3%B4%E6%B8%85%E5%BE%B7%20%E9%9D%9E%E5%B8%B8%E9%AB%98%E5%82%B2-034024051.html',
- 'md5': '45c024bad51e63e9b6f6fad7a43a8c23',
- 'info_dict': {
- 'id': 'cac903b3-fcf4-3c14-b632-643ab541712f',
- 'ext': 'mp4',
- 'title': '敢問市長/黃秀霜批賴清德「非常高傲」',
- 'description': '直言台南沒捷運 交通居五都之末',
- 'duration': 396,
- },
- },
- {
- 'url': 'https://uk.screen.yahoo.com/editor-picks/cute-raccoon-freed-drain-using-091756545.html',
- 'md5': '71298482f7c64cbb7fa064e4553ff1c1',
- 'info_dict': {
- 'id': 'b3affa53-2e14-3590-852b-0e0db6cd1a58',
- 'ext': 'webm',
- 'title': 'Cute Raccoon Freed From Drain\u00a0Using Angle Grinder',
- 'description': 'md5:f66c890e1490f4910a9953c941dee944',
- 'duration': 97,
- }
- },
- {
- 'url': 'https://ca.sports.yahoo.com/video/program-makes-hockey-more-affordable-013127711.html',
- 'md5': '57e06440778b1828a6079d2f744212c4',
- 'info_dict': {
- 'id': 'c9fa2a36-0d4d-3937-b8f6-cc0fb1881e73',
- 'ext': 'mp4',
- 'title': 'Program that makes hockey more affordable not offered in Manitoba',
- 'description': 'md5:c54a609f4c078d92b74ffb9bf1f496f4',
- 'duration': 121,
- },
- 'skip': 'Video gone',
- }, {
- 'url': 'https://ca.finance.yahoo.com/news/hackers-sony-more-trouble-well-154609075.html',
- 'info_dict': {
- 'id': '154609075',
- },
- 'playlist': [{
- 'md5': '000887d0dc609bc3a47c974151a40fb8',
- 'info_dict': {
- 'id': 'e624c4bc-3389-34de-9dfc-025f74943409',
- 'ext': 'mp4',
- 'title': '\'The Interview\' TV Spot: War',
- 'description': 'The Interview',
- 'duration': 30,
- },
- }, {
- 'md5': '81bc74faf10750fe36e4542f9a184c66',
- 'info_dict': {
- 'id': '1fc8ada0-718e-3abe-a450-bf31f246d1a9',
- 'ext': 'mp4',
- 'title': '\'The Interview\' TV Spot: Guys',
- 'description': 'The Interview',
- 'duration': 30,
- },
- }],
- }, {
- 'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
- 'md5': '88e209b417f173d86186bef6e4d1f160',
- 'info_dict': {
- 'id': 'f885cf7f-43d4-3450-9fac-46ac30ece521',
- 'ext': 'mp4',
- 'title': 'China Moses Is Crazy About the Blues',
- 'description': 'md5:9900ab8cd5808175c7b3fe55b979bed0',
- 'duration': 128,
- }
- }, {
- 'url': 'https://in.lifestyle.yahoo.com/video/connect-dots-dark-side-virgo-090247395.html',
- 'md5': 'd9a083ccf1379127bf25699d67e4791b',
- 'info_dict': {
- 'id': '52aeeaa3-b3d1-30d8-9ef8-5d0cf05efb7c',
- 'ext': 'mp4',
- 'title': 'Connect the Dots: Dark Side of Virgo',
- 'description': 'md5:1428185051cfd1949807ad4ff6d3686a',
- 'duration': 201,
- },
- 'skip': 'Domain name in.lifestyle.yahoo.com gone',
- }, {
- 'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html',
- 'md5': '989396ae73d20c6f057746fb226aa215',
- 'info_dict': {
- 'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',
- 'ext': 'mp4',
- 'title': '\'True Story\' Trailer',
- 'description': 'True Story',
- 'duration': 150,
- },
- }, {
- 'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html',
- 'only_matching': True,
- }, {
- 'note': 'NBC Sports embeds',
- 'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313',
- 'info_dict': {
- 'id': '9CsDKds0kvHI',
- 'ext': 'flv',
- 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
- 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
- 'upload_date': '20150313',
- 'uploader': 'NBCU-SPORTS',
- 'timestamp': 1426270238,
- }
- }, {
- 'url': 'https://tw.news.yahoo.com/-100120367.html',
- 'only_matching': True,
- }, {
- # Query result is embedded in webpage, but explicit request to video API fails with geo restriction
- 'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html',
- 'md5': '4fbafb9c9b6f07aa8f870629f6671b35',
- 'info_dict': {
- 'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504',
- 'ext': 'mp4',
- 'title': 'Communitary - Community Episode 1: Ladders',
- 'description': 'md5:8fc39608213295748e1e289807838c97',
- 'duration': 1646,
- },
- }, {
- # it uses an alias to get the video_id
- 'url': 'https://www.yahoo.com/movies/the-stars-of-daddys-home-have-very-different-212843197.html',
- 'info_dict': {
- 'id': '40eda9c8-8e5f-3552-8745-830f67d0c737',
- 'ext': 'mp4',
- 'title': 'Will Ferrell & Mark Wahlberg Are Pro-Spanking',
- 'description': 'While they play feuding fathers in \'Daddy\'s Home,\' star Will Ferrell & Mark Wahlberg share their true feelings on parenthood.',
- },
- },
- {
- # config['models']['applet_model']['data']['sapi'] has no query
- 'url': 'https://www.yahoo.com/music/livenation/event/galactic-2016',
- 'md5': 'dac0c72d502bc5facda80c9e6d5c98db',
- 'info_dict': {
- 'id': 'a6015640-e9e5-3efb-bb60-05589a183919',
- 'ext': 'mp4',
- 'description': 'Galactic',
- 'title': 'Dolla Diva (feat. Maggie Koerner)',
- },
- 'skip': 'redirect to https://www.yahoo.com/music',
- },
- {
- # yahoo://article/
- 'url': 'https://www.yahoo.com/movies/video/true-story-trailer-173000497.html',
- 'info_dict': {
- 'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',
- 'ext': 'mp4',
- 'title': "'True Story' Trailer",
- 'description': 'True Story',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # ytwnews://cavideo/
- 'url': 'https://tw.video.yahoo.com/movie-tw/單車天使-中文版預-092316541.html',
- 'info_dict': {
- 'id': 'ba133ff2-0793-3510-b636-59dfe9ff6cff',
- 'ext': 'mp4',
- 'title': '單車天使 - 中文版預',
- 'description': '中文版預',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # custom brightcove
- 'url': 'https://au.tv.yahoo.com/plus7/sunrise/-/watch/37083565/clown-entertainers-say-it-is-hurting-their-business/',
- 'info_dict': {
- 'id': '5575377707001',
- 'ext': 'mp4',
- 'title': "Clown entertainers say 'It' is hurting their business",
- 'description': 'Stephen King s horror film has much to answer for. Jelby and Mr Loopy the Clowns join us.',
- 'timestamp': 1505341164,
- 'upload_date': '20170913',
- 'uploader_id': '2376984109001',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # custom brightcove, geo-restricted to Australia, bypassable
- 'url': 'https://au.tv.yahoo.com/plus7/sunrise/-/watch/37263964/sunrise-episode-wed-27-sep/',
- 'only_matching': True,
- }
- ]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- page_id = mobj.group('id')
- display_id = mobj.group('display_id') or page_id
- host = mobj.group('host')
- webpage, urlh = self._download_webpage_handle(url, display_id)
- if 'err=404' in urlh.geturl():
- raise ExtractorError('Video gone', expected=True)
-
- # Look for iframed media first
- entries = []
- iframe_urls = re.findall(r'<iframe[^>]+src="(/video/.+?-\d+\.html\?format=embed.*?)"', webpage)
- for idx, iframe_url in enumerate(iframe_urls):
- entries.append(self.url_result(host + iframe_url, 'Yahoo'))
- if entries:
- return self.playlist_result(entries, page_id)
-
- # Look for NBCSports iframes
- nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
- if nbc_sports_url:
- return self.url_result(nbc_sports_url, NBCSportsVPlayerIE.ie_key())
-
- # Look for Brightcove Legacy Studio embeds
- bc_url = BrightcoveLegacyIE._extract_brightcove_url(webpage)
- if bc_url:
- return self.url_result(bc_url, BrightcoveLegacyIE.ie_key())
-
- def brightcove_url_result(bc_url):
- return self.url_result(
- smuggle_url(bc_url, {'geo_countries': [mobj.group('country')]}),
- BrightcoveNewIE.ie_key())
-
- # Look for Brightcove New Studio embeds
- bc_url = BrightcoveNewIE._extract_url(self, webpage)
- if bc_url:
- return brightcove_url_result(bc_url)
-
- brightcove_iframe = self._search_regex(
- r'(<iframe[^>]+data-video-id=["\']\d+[^>]+>)', webpage,
- 'brightcove iframe', default=None)
- if brightcove_iframe:
- attr = extract_attributes(brightcove_iframe)
- src = attr.get('src')
- if src:
- parsed_src = compat_urlparse.urlparse(src)
- qs = compat_urlparse.parse_qs(parsed_src.query)
- account_id = qs.get('accountId', ['2376984109001'])[0]
- brightcove_id = attr.get('data-video-id') or qs.get('videoId', [None])[0]
- if account_id and brightcove_id:
- return brightcove_url_result(
- 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
- % (account_id, brightcove_id))
-
- # Query result is often embedded in webpage as JSON. Sometimes explicit requests
- # to video API results in a failure with geo restriction reason therefore using
- # embedded query result when present sounds reasonable.
- config_json = self._search_regex(
- r'window\.Af\.bootstrap\[[^\]]+\]\s*=\s*({.*?"applet_type"\s*:\s*"td-applet-videoplayer".*?});(?:</script>|$)',
- webpage, 'videoplayer applet', default=None)
- if config_json:
- config = self._parse_json(config_json, display_id, fatal=False)
- if config:
- sapi = config.get('models', {}).get('applet_model', {}).get('data', {}).get('sapi')
- if sapi and 'query' in sapi:
- info = self._extract_info(display_id, sapi, webpage)
- self._sort_formats(info['formats'])
- return info
-
- items_json = self._search_regex(
- r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
- default=None)
- if items_json is None:
- alias = self._search_regex(
- r'"aliases":{"video":"(.*?)"', webpage, 'alias', default=None)
- if alias is not None:
- alias_info = self._download_json(
- 'https://www.yahoo.com/_td/api/resource/VideoService.videos;video_aliases=["%s"]' % alias,
- display_id, 'Downloading alias info')
- video_id = alias_info[0]['id']
- else:
- CONTENT_ID_REGEXES = [
- r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"',
- r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"',
- r'"first_videoid"\s*:\s*"([^"]+)"',
- r'%s[^}]*"ccm_id"\s*:\s*"([^"]+)"' % re.escape(page_id),
- r'<article[^>]data-uuid=["\']([^"\']+)',
- r'<meta[^<>]+yahoo://article/view\?.*\buuid=([^&"\']+)',
- r'<meta[^<>]+["\']ytwnews://cavideo/(?:[^/]+/)+([\da-fA-F-]+)[&"\']',
- ]
- video_id = self._search_regex(
- CONTENT_ID_REGEXES, webpage, 'content ID')
- else:
- items = json.loads(items_json)
- info = items['mediaItems']['query']['results']['mediaObj'][0]
- # The 'meta' field is not always in the video webpage, we request it
- # from another page
- video_id = info['id']
- return self._get_info(video_id, display_id, webpage)
-
- def _extract_info(self, display_id, query, webpage):
- info = query['query']['results']['mediaObj'][0]
- meta = info.get('meta')
- video_id = info.get('id')
-
- if not meta:
- msg = info['status'].get('msg')
- if msg:
- raise ExtractorError(
- '%s returned error: %s' % (self.IE_NAME, msg), expected=True)
- raise ExtractorError('Unable to extract media object meta')
-
- formats = []
- for s in info['streams']:
- tbr = int_or_none(s.get('bitrate'))
- format_info = {
- 'width': int_or_none(s.get('width')),
- 'height': int_or_none(s.get('height')),
- 'tbr': tbr,
- }
-
- host = s['host']
- path = s['path']
- if host.startswith('rtmp'):
- fmt = 'rtmp'
- format_info.update({
- 'url': host,
- 'play_path': path,
- 'ext': 'flv',
- })
- else:
- if s.get('format') == 'm3u8_playlist':
- fmt = 'hls'
- format_info.update({
- 'protocol': 'm3u8_native',
- 'ext': 'mp4',
- })
- else:
- fmt = format_info['ext'] = determine_ext(path)
- format_url = compat_urlparse.urljoin(host, path)
- format_info['url'] = format_url
- format_info['format_id'] = fmt + ('-%d' % tbr if tbr else '')
- formats.append(format_info)
-
- closed_captions = self._html_search_regex(
- r'"closedcaptions":(\[[^\]]+\])', webpage, 'closed captions',
- default='[]')
-
- cc_json = self._parse_json(closed_captions, video_id, fatal=False)
- subtitles = {}
- if cc_json:
- for closed_caption in cc_json:
- lang = closed_caption['lang']
- if lang not in subtitles:
- subtitles[lang] = []
- subtitles[lang].append({
- 'url': closed_caption['url'],
- 'ext': mimetype2ext(closed_caption['content_type']),
- })
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': unescapeHTML(meta['title']),
- 'formats': formats,
- 'description': clean_html(meta['description']),
- 'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage),
- 'duration': int_or_none(meta.get('duration')),
- 'subtitles': subtitles,
- }
-
- def _get_info(self, video_id, display_id, webpage):
- region = self._search_regex(
- r'\\?"region\\?"\s*:\s*\\?"([^"]+?)\\?"',
- webpage, 'region', fatal=False, default='US').upper()
- formats = []
- info = {}
- for fmt in ('webm', 'mp4'):
- query_result = self._download_json(
- 'https://video.media.yql.yahoo.com/v1/video/sapi/streams/' + video_id,
- display_id, 'Downloading %s video info' % fmt, query={
- 'protocol': 'http',
- 'region': region,
- 'format': fmt,
- })
- info = self._extract_info(display_id, query_result, webpage)
- formats.extend(info['formats'])
- formats.extend(self._extract_m3u8_formats(
- 'http://video.media.yql.yahoo.com/v1/hls/%s?region=%s' % (video_id, region),
- video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
- self._sort_formats(formats)
- info['formats'] = formats
- return info
-
-
-class YahooSearchIE(SearchInfoExtractor):
- IE_DESC = 'Yahoo screen search'
- _MAX_RESULTS = 1000
- IE_NAME = 'screen.yahoo:search'
- _SEARCH_KEY = 'yvsearch'
-
- def _get_n_results(self, query, n):
- """Get a specified number of results for a query"""
- entries = []
- for pagenum in itertools.count(0):
- result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
- info = self._download_json(result_url, query,
- note='Downloading results page ' + str(pagenum + 1))
- m = info['m']
- results = info['results']
-
- for (i, r) in enumerate(results):
- if (pagenum * 30) + i >= n:
- break
- mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r)
- e = self.url_result('http://' + mobj.group('url'), 'Yahoo')
- entries.append(e)
- if (pagenum * 30 + i >= n) or (m['last'] >= (m['total'] - 1)):
- break
-
- return {
- '_type': 'playlist',
- 'id': query,
- 'entries': entries,
- }
-
-
-class YahooGyaOPlayerIE(InfoExtractor):
- IE_NAME = 'yahoo:gyao:player'
- _VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/(?:player|episode/[^/]+)|streaming\.yahoo\.co\.jp/c/y)/(?P<id>\d+/v\d+/v\d+|[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
- _TESTS = [{
- 'url': 'https://gyao.yahoo.co.jp/player/00998/v00818/v0000000000000008564/',
- 'info_dict': {
- 'id': '5993125228001',
- 'ext': 'mp4',
- 'title': 'フューリー 【字幕版】',
- 'description': 'md5:21e691c798a15330eda4db17a8fe45a5',
- 'uploader_id': '4235717419001',
- 'upload_date': '20190124',
- 'timestamp': 1548294365,
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- }, {
- 'url': 'https://streaming.yahoo.co.jp/c/y/01034/v00133/v0000000000000000706/',
- 'only_matching': True,
- }, {
- 'url': 'https://gyao.yahoo.co.jp/episode/%E3%81%8D%E3%81%AE%E3%81%86%E4%BD%95%E9%A3%9F%E3%81%B9%E3%81%9F%EF%BC%9F%20%E7%AC%AC2%E8%A9%B1%202019%2F4%2F12%E6%94%BE%E9%80%81%E5%88%86/5cb02352-b725-409e-9f8d-88f947a9f682',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url).replace('/', ':')
- video = self._download_json(
- 'https://gyao.yahoo.co.jp/dam/v1/videos/' + video_id,
- video_id, query={
- 'fields': 'longDescription,title,videoId',
- }, headers={
- 'X-User-Agent': 'Unknown Pc GYAO!/2.0.0 Web',
- })
- return {
- '_type': 'url_transparent',
- 'id': video_id,
- 'title': video['title'],
- 'url': smuggle_url(
- 'http://players.brightcove.net/4235717419001/default_default/index.html?videoId=' + video['videoId'],
- {'geo_countries': ['JP']}),
- 'description': video.get('longDescription'),
- 'ie_key': BrightcoveNewIE.ie_key(),
- }
-
-
-class YahooGyaOIE(InfoExtractor):
- IE_NAME = 'yahoo:gyao'
- _VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/(?:p|title/[^/]+)|streaming\.yahoo\.co\.jp/p/y)/(?P<id>\d+/v\d+|[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
- _TESTS = [{
- 'url': 'https://gyao.yahoo.co.jp/p/00449/v03102/',
- 'info_dict': {
- 'id': '00449:v03102',
- },
- 'playlist_count': 2,
- }, {
- 'url': 'https://streaming.yahoo.co.jp/p/y/01034/v00133/',
- 'only_matching': True,
- }, {
- 'url': 'https://gyao.yahoo.co.jp/title/%E3%81%97%E3%82%83%E3%81%B9%E3%81%8F%E3%82%8A007/5b025a49-b2e5-4dc7-945c-09c6634afacf',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- program_id = self._match_id(url).replace('/', ':')
- videos = self._download_json(
- 'https://gyao.yahoo.co.jp/api/programs/%s/videos' % program_id, program_id)['videos']
- entries = []
- for video in videos:
- video_id = video.get('id')
- if not video_id:
- continue
- entries.append(self.url_result(
- 'https://gyao.yahoo.co.jp/player/%s/' % video_id.replace(':', '/'),
- YahooGyaOPlayerIE.ie_key(), video_id))
- return self.playlist_result(entries, program_id)
-
-
-class YahooJapanNewsIE(InfoExtractor):
- IE_NAME = 'yahoo:japannews'
- IE_DESC = 'Yahoo! Japan News'
- _VALID_URL = r'https?://(?P<host>(?:news|headlines)\.yahoo\.co\.jp)[^\d]*(?P<id>\d[\d-]*\d)?'
- _GEO_COUNTRIES = ['JP']
- _TESTS = [{
- 'url': 'https://headlines.yahoo.co.jp/videonews/ann?a=20190716-00000071-ann-int',
- 'info_dict': {
- 'id': '1736242',
- 'ext': 'mp4',
- 'title': 'ムン大統領が対日批判を強化“現金化”効果は?(テレビ朝日系(ANN)) - Yahoo!ニュース',
- 'description': '韓国の元徴用工らを巡る裁判の原告が弁護士が差し押さえた三菱重工業の資産を売却して - Yahoo!ニュース(テレビ朝日系(ANN))',
- 'thumbnail': r're:^https?://.*\.[a-zA-Z\d]{3,4}$',
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- # geo restricted
- 'url': 'https://headlines.yahoo.co.jp/hl?a=20190721-00000001-oxv-l04',
- 'only_matching': True,
- }, {
- 'url': 'https://headlines.yahoo.co.jp/videonews/',
- 'only_matching': True,
- }, {
- 'url': 'https://news.yahoo.co.jp',
- 'only_matching': True,
- }, {
- 'url': 'https://news.yahoo.co.jp/byline/hashimotojunji/20190628-00131977/',
- 'only_matching': True,
- }, {
- 'url': 'https://news.yahoo.co.jp/feature/1356',
- 'only_matching': True
- }]
-
- def _extract_formats(self, json_data, content_id):
- formats = []
-
- video_data = try_get(
- json_data,
- lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
- list)
- for vid in video_data or []:
- delivery = vid.get('delivery')
- url = url_or_none(vid.get('Url'))
- if not delivery or not url:
- continue
- elif delivery == 'hls':
- formats.extend(
- self._extract_m3u8_formats(
- url, content_id, 'mp4', 'm3u8_native',
- m3u8_id='hls', fatal=False))
- else:
- formats.append({
- 'url': url,
- 'format_id': 'http-%s' % compat_str(vid.get('bitrate', '')),
- 'height': int_or_none(vid.get('height')),
- 'width': int_or_none(vid.get('width')),
- 'tbr': int_or_none(vid.get('bitrate')),
- })
- self._remove_duplicate_formats(formats)
- self._sort_formats(formats)
-
- return formats
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- host = mobj.group('host')
- display_id = mobj.group('id') or host
-
- webpage = self._download_webpage(url, display_id)
-
- title = self._html_search_meta(
- ['og:title', 'twitter:title'], webpage, 'title', default=None
- ) or self._html_search_regex('<title>([^<]+)</title>', webpage, 'title')
-
- if display_id == host:
- # Headline page (w/ multiple BC playlists) ('news.yahoo.co.jp', 'headlines.yahoo.co.jp/videonews/', ...)
- stream_plists = re.findall(r'plist=(\d+)', webpage) or re.findall(r'plist["\']:\s*["\']([^"\']+)', webpage)
- entries = [
- self.url_result(
- smuggle_url(
- 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=%s' % plist_id,
- {'geo_countries': ['JP']}),
- ie='BrightcoveNew', video_id=plist_id)
- for plist_id in stream_plists]
- return self.playlist_result(entries, playlist_title=title)
-
- # Article page
- description = self._html_search_meta(
- ['og:description', 'description', 'twitter:description'],
- webpage, 'description', default=None)
- thumbnail = self._og_search_thumbnail(
- webpage, default=None) or self._html_search_meta(
- 'twitter:image', webpage, 'thumbnail', default=None)
- space_id = self._search_regex([
- r'<script[^>]+class=["\']yvpub-player["\'][^>]+spaceid=([^&"\']+)',
- r'YAHOO\.JP\.srch\.\w+link\.onLoad[^;]+spaceID["\' ]*:["\' ]+([^"\']+)',
- r'<!--\s+SpaceID=(\d+)'
- ], webpage, 'spaceid')
-
- content_id = self._search_regex(
- r'<script[^>]+class=["\']yvpub-player["\'][^>]+contentid=(?P<contentid>[^&"\']+)',
- webpage, 'contentid', group='contentid')
-
- json_data = self._download_json(
- 'https://feapi-yvpub.yahooapis.jp/v1/content/%s' % content_id,
- content_id,
- query={
- 'appid': 'dj0zaiZpPVZMTVFJR0FwZWpiMyZzPWNvbnN1bWVyc2VjcmV0Jng9YjU-',
- 'output': 'json',
- 'space_id': space_id,
- 'domain': host,
- 'ak': hashlib.md5('_'.join((space_id, host)).encode()).hexdigest(),
- 'device_type': '1100',
- })
- formats = self._extract_formats(json_data, content_id)
-
- return {
- 'id': content_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/yandexmusic.py b/youtube_dl/extractor/yandexmusic.py
deleted file mode 100644
index 08d35e04c..000000000
--- a/youtube_dl/extractor/yandexmusic.py
+++ /dev/null
@@ -1,313 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-import hashlib
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- ExtractorError,
- int_or_none,
- float_or_none,
- try_get,
-)
-
-
-class YandexMusicBaseIE(InfoExtractor):
- @staticmethod
- def _handle_error(response):
- if isinstance(response, dict):
- error = response.get('error')
- if error:
- raise ExtractorError(error, expected=True)
- if response.get('type') == 'captcha' or 'captcha' in response:
- YandexMusicBaseIE._raise_captcha()
-
- @staticmethod
- def _raise_captcha():
- raise ExtractorError(
- 'YandexMusic has considered youtube-dl requests automated and '
- 'asks you to solve a CAPTCHA. You can either wait for some '
- 'time until unblocked and optionally use --sleep-interval '
- 'in future or alternatively you can go to https://music.yandex.ru/ '
- 'solve CAPTCHA, then export cookies and pass cookie file to '
- 'youtube-dl with --cookies',
- expected=True)
-
- def _download_webpage_handle(self, *args, **kwargs):
- webpage = super(YandexMusicBaseIE, self)._download_webpage_handle(*args, **kwargs)
- if 'Нам очень жаль, но&nbsp;запросы, поступившие с&nbsp;вашего IP-адреса, похожи на&nbsp;автоматические.' in webpage:
- self._raise_captcha()
- return webpage
-
- def _download_json(self, *args, **kwargs):
- response = super(YandexMusicBaseIE, self)._download_json(*args, **kwargs)
- self._handle_error(response)
- return response
-
-
-class YandexMusicTrackIE(YandexMusicBaseIE):
- IE_NAME = 'yandexmusic:track'
- IE_DESC = 'Яндекс.Музыка - Трек'
- _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
-
- _TESTS = [{
- 'url': 'http://music.yandex.ru/album/540508/track/4878838',
- 'md5': 'f496818aa2f60b6c0062980d2e00dc20',
- 'info_dict': {
- 'id': '4878838',
- 'ext': 'mp3',
- 'title': 'Carlo Ambrosio & Fabio Di Bari - Gypsy Eyes 1',
- 'filesize': 4628061,
- 'duration': 193.04,
- 'track': 'Gypsy Eyes 1',
- 'album': 'Gypsy Soul',
- 'album_artist': 'Carlo Ambrosio',
- 'artist': 'Carlo Ambrosio & Fabio Di Bari',
- 'release_year': 2009,
- },
- 'skip': 'Travis CI servers blocked by YandexMusic',
- }, {
- # multiple disks
- 'url': 'http://music.yandex.ru/album/3840501/track/705105',
- 'md5': 'ebe7b4e2ac7ac03fe11c19727ca6153e',
- 'info_dict': {
- 'id': '705105',
- 'ext': 'mp3',
- 'title': 'Hooverphonic - Sometimes',
- 'filesize': 5743386,
- 'duration': 239.27,
- 'track': 'Sometimes',
- 'album': 'The Best of Hooverphonic',
- 'album_artist': 'Hooverphonic',
- 'artist': 'Hooverphonic',
- 'release_year': 2016,
- 'genre': 'pop',
- 'disc_number': 2,
- 'track_number': 9,
- },
- 'skip': 'Travis CI servers blocked by YandexMusic',
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- album_id, track_id = mobj.group('album_id'), mobj.group('id')
-
- track = self._download_json(
- 'http://music.yandex.ru/handlers/track.jsx?track=%s:%s' % (track_id, album_id),
- track_id, 'Downloading track JSON')['track']
- track_title = track['title']
-
- download_data = self._download_json(
- 'https://music.yandex.ru/api/v2.1/handlers/track/%s:%s/web-album_track-track-track-main/download/m' % (track_id, album_id),
- track_id, 'Downloading track location url JSON',
- headers={'X-Retpath-Y': url})
-
- fd_data = self._download_json(
- download_data['src'], track_id,
- 'Downloading track location JSON',
- query={'format': 'json'})
- key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + fd_data['path'][1:] + fd_data['s']).encode('utf-8')).hexdigest()
- storage = track['storageDir'].split('.')
- f_url = 'http://%s/get-mp3/%s/%s?track-id=%s ' % (fd_data['host'], key, fd_data['ts'] + fd_data['path'], storage[1])
-
- thumbnail = None
- cover_uri = track.get('albums', [{}])[0].get('coverUri')
- if cover_uri:
- thumbnail = cover_uri.replace('%%', 'orig')
- if not thumbnail.startswith('http'):
- thumbnail = 'http://' + thumbnail
-
- track_info = {
- 'id': track_id,
- 'ext': 'mp3',
- 'url': f_url,
- 'filesize': int_or_none(track.get('fileSize')),
- 'duration': float_or_none(track.get('durationMs'), 1000),
- 'thumbnail': thumbnail,
- 'track': track_title,
- 'acodec': download_data.get('codec'),
- 'abr': int_or_none(download_data.get('bitrate')),
- }
-
- def extract_artist_name(artist):
- decomposed = artist.get('decomposed')
- if not isinstance(decomposed, list):
- return artist['name']
- parts = [artist['name']]
- for element in decomposed:
- if isinstance(element, dict) and element.get('name'):
- parts.append(element['name'])
- elif isinstance(element, compat_str):
- parts.append(element)
- return ''.join(parts)
-
- def extract_artist(artist_list):
- if artist_list and isinstance(artist_list, list):
- artists_names = [extract_artist_name(a) for a in artist_list if a.get('name')]
- if artists_names:
- return ', '.join(artists_names)
-
- albums = track.get('albums')
- if albums and isinstance(albums, list):
- album = albums[0]
- if isinstance(album, dict):
- year = album.get('year')
- disc_number = int_or_none(try_get(
- album, lambda x: x['trackPosition']['volume']))
- track_number = int_or_none(try_get(
- album, lambda x: x['trackPosition']['index']))
- track_info.update({
- 'album': album.get('title'),
- 'album_artist': extract_artist(album.get('artists')),
- 'release_year': int_or_none(year),
- 'genre': album.get('genre'),
- 'disc_number': disc_number,
- 'track_number': track_number,
- })
-
- track_artist = extract_artist(track.get('artists'))
- if track_artist:
- track_info.update({
- 'artist': track_artist,
- 'title': '%s - %s' % (track_artist, track_title),
- })
- else:
- track_info['title'] = track_title
-
- return track_info
-
-
-class YandexMusicPlaylistBaseIE(YandexMusicBaseIE):
- def _build_playlist(self, tracks):
- return [
- self.url_result(
- 'http://music.yandex.ru/album/%s/track/%s' % (track['albums'][0]['id'], track['id']))
- for track in tracks if track.get('albums') and isinstance(track.get('albums'), list)]
-
-
-class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
- IE_NAME = 'yandexmusic:album'
- IE_DESC = 'Яндекс.Музыка - Альбом'
- _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)'
-
- _TESTS = [{
- 'url': 'http://music.yandex.ru/album/540508',
- 'info_dict': {
- 'id': '540508',
- 'title': 'Carlo Ambrosio - Gypsy Soul (2009)',
- },
- 'playlist_count': 50,
- 'skip': 'Travis CI servers blocked by YandexMusic',
- }, {
- 'url': 'https://music.yandex.ru/album/3840501',
- 'info_dict': {
- 'id': '3840501',
- 'title': 'Hooverphonic - The Best of Hooverphonic (2016)',
- },
- 'playlist_count': 33,
- 'skip': 'Travis CI servers blocked by YandexMusic',
- }]
-
- def _real_extract(self, url):
- album_id = self._match_id(url)
-
- album = self._download_json(
- 'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id,
- album_id, 'Downloading album JSON')
-
- entries = self._build_playlist([track for volume in album['volumes'] for track in volume])
-
- title = '%s - %s' % (album['artists'][0]['name'], album['title'])
- year = album.get('year')
- if year:
- title += ' (%s)' % year
-
- return self.playlist_result(entries, compat_str(album['id']), title)
-
-
-class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
- IE_NAME = 'yandexmusic:playlist'
- IE_DESC = 'Яндекс.Музыка - Плейлист'
- _VALID_URL = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by)/users/(?P<user>[^/]+)/playlists/(?P<id>\d+)'
-
- _TESTS = [{
- 'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
- 'info_dict': {
- 'id': '1245',
- 'title': 'Что слушают Enter Shikari',
- 'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
- },
- 'playlist_count': 6,
- 'skip': 'Travis CI servers blocked by YandexMusic',
- }, {
- # playlist exceeding the limit of 150 tracks shipped with webpage (see
- # https://github.com/ytdl-org/youtube-dl/issues/6666)
- 'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036',
- 'info_dict': {
- 'id': '1036',
- 'title': 'Музыка 90-х',
- },
- 'playlist_mincount': 300,
- 'skip': 'Travis CI servers blocked by YandexMusic',
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- tld = mobj.group('tld')
- user = mobj.group('user')
- playlist_id = mobj.group('id')
-
- playlist = self._download_json(
- 'https://music.yandex.%s/handlers/playlist.jsx' % tld,
- playlist_id, 'Downloading missing tracks JSON',
- fatal=False,
- headers={
- 'Referer': url,
- 'X-Requested-With': 'XMLHttpRequest',
- 'X-Retpath-Y': url,
- },
- query={
- 'owner': user,
- 'kinds': playlist_id,
- 'light': 'true',
- 'lang': tld,
- 'external-domain': 'music.yandex.%s' % tld,
- 'overembed': 'false',
- })['playlist']
-
- tracks = playlist['tracks']
- track_ids = [compat_str(track_id) for track_id in playlist['trackIds']]
-
- # tracks dictionary shipped with playlist.jsx API is limited to 150 tracks,
- # missing tracks should be retrieved manually.
- if len(tracks) < len(track_ids):
- present_track_ids = set([
- compat_str(track['id'])
- for track in tracks if track.get('id')])
- missing_track_ids = [
- track_id for track_id in track_ids
- if track_id not in present_track_ids]
- missing_tracks = self._download_json(
- 'https://music.yandex.%s/handlers/track-entries.jsx' % tld,
- playlist_id, 'Downloading missing tracks JSON',
- fatal=False,
- headers={
- 'Referer': url,
- 'X-Requested-With': 'XMLHttpRequest',
- },
- query={
- 'entries': ','.join(missing_track_ids),
- 'lang': tld,
- 'external-domain': 'music.yandex.%s' % tld,
- 'overembed': 'false',
- 'strict': 'true',
- })
- if missing_tracks:
- tracks.extend(missing_tracks)
-
- return self.playlist_result(
- self._build_playlist(tracks),
- compat_str(playlist_id),
- playlist.get('title'), playlist.get('description'))
diff --git a/youtube_dl/extractor/youjizz.py b/youtube_dl/extractor/youjizz.py
deleted file mode 100644
index dff69fcb7..000000000
--- a/youtube_dl/extractor/youjizz.py
+++ /dev/null
@@ -1,95 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- determine_ext,
- int_or_none,
- parse_duration,
- url_or_none,
-)
-
-
-class YouJizzIE(InfoExtractor):
- _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]*-(?P<id>\d+)\.html|embed/(?P<embed_id>\d+))'
- _TESTS = [{
- 'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
- 'md5': 'b1e1dfaa8bb9537d8b84eeda9cf4acf4',
- 'info_dict': {
- 'id': '2189178',
- 'ext': 'mp4',
- 'title': 'Zeichentrick 1',
- 'age_limit': 18,
- 'duration': 2874,
- }
- }, {
- 'url': 'http://www.youjizz.com/videos/-2189178.html',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youjizz.com/videos/embed/31991001',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id') or mobj.group('embed_id')
-
- webpage = self._download_webpage(url, video_id)
-
- title = self._html_search_regex(
- r'<title>(.+?)</title>', webpage, 'title')
-
- formats = []
-
- encodings = self._parse_json(
- self._search_regex(
- r'encodings\s*=\s*(\[.+?\]);\n', webpage, 'encodings',
- default='[]'),
- video_id, fatal=False)
- for encoding in encodings:
- if not isinstance(encoding, dict):
- continue
- format_url = url_or_none(encoding.get('filename'))
- if not format_url:
- continue
- if determine_ext(format_url) == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- format_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls', fatal=False))
- else:
- format_id = encoding.get('name') or encoding.get('quality')
- height = int_or_none(self._search_regex(
- r'^(\d+)[pP]', format_id, 'height', default=None))
- formats.append({
- 'url': format_url,
- 'format_id': format_id,
- 'height': height,
- })
-
- if formats:
- info_dict = {
- 'formats': formats,
- }
- else:
- # YouJizz's HTML5 player has invalid HTML
- webpage = webpage.replace('"controls', '" controls')
- info_dict = self._parse_html5_media_entries(
- url, webpage, video_id)[0]
-
- duration = parse_duration(self._search_regex(
- r'<strong>Runtime:</strong>([^<]+)', webpage, 'duration',
- default=None))
- uploader = self._search_regex(
- r'<strong>Uploaded By:.*?<a[^>]*>([^<]+)', webpage, 'uploader',
- default=None)
-
- info_dict.update({
- 'id': video_id,
- 'title': title,
- 'age_limit': self._rta_search(webpage),
- 'duration': duration,
- 'uploader': uploader,
- })
-
- return info_dict
diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py
deleted file mode 100644
index d4eccb4b2..000000000
--- a/youtube_dl/extractor/youporn.py
+++ /dev/null
@@ -1,192 +0,0 @@
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- int_or_none,
- sanitized_Request,
- str_to_int,
- unescapeHTML,
- unified_strdate,
- url_or_none,
-)
-from ..aes import aes_decrypt_text
-
-
-class YouPornIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?youporn\.com/watch/(?P<id>\d+)/(?P<display_id>[^/?#&]+)'
- _TESTS = [{
- 'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
- 'md5': '3744d24c50438cf5b6f6d59feb5055c2',
- 'info_dict': {
- 'id': '505835',
- 'display_id': 'sex-ed-is-it-safe-to-masturbate-daily',
- 'ext': 'mp4',
- 'title': 'Sex Ed: Is It Safe To Masturbate Daily?',
- 'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'Ask Dan And Jennifer',
- 'upload_date': '20101217',
- 'average_rating': int,
- 'view_count': int,
- 'comment_count': int,
- 'categories': list,
- 'tags': list,
- 'age_limit': 18,
- },
- }, {
- # Unknown uploader
- 'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4',
- 'info_dict': {
- 'id': '561726',
- 'display_id': 'big-tits-awesome-brunette-on-amazing-webcam-show',
- 'ext': 'mp4',
- 'title': 'Big Tits Awesome Brunette On amazing webcam show',
- 'description': 'http://sweetlivegirls.com Big Tits Awesome Brunette On amazing webcam show.mp4',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'uploader': 'Unknown',
- 'upload_date': '20110418',
- 'average_rating': int,
- 'view_count': int,
- 'comment_count': int,
- 'categories': list,
- 'tags': list,
- 'age_limit': 18,
- },
- 'params': {
- 'skip_download': True,
- },
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- display_id = mobj.group('display_id')
-
- request = sanitized_Request(url)
- request.add_header('Cookie', 'age_verified=1')
- webpage = self._download_webpage(request, display_id)
-
- title = self._html_search_regex(
- r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>',
- webpage, 'title', default=None) or self._og_search_title(
- webpage, default=None) or self._html_search_meta(
- 'title', webpage, fatal=True)
-
- links = []
-
- # Main source
- definitions = self._parse_json(
- self._search_regex(
- r'mediaDefinition\s*=\s*(\[.+?\]);', webpage,
- 'media definitions', default='[]'),
- video_id, fatal=False)
- if definitions:
- for definition in definitions:
- if not isinstance(definition, dict):
- continue
- video_url = url_or_none(definition.get('videoUrl'))
- if video_url:
- links.append(video_url)
-
- # Fallback #1, this also contains extra low quality 180p format
- for _, link in re.findall(r'<a[^>]+href=(["\'])(http.+?)\1[^>]+title=["\']Download [Vv]ideo', webpage):
- links.append(link)
-
- # Fallback #2 (unavailable as at 22.06.2017)
- sources = self._search_regex(
- r'(?s)sources\s*:\s*({.+?})', webpage, 'sources', default=None)
- if sources:
- for _, link in re.findall(r'[^:]+\s*:\s*(["\'])(http.+?)\1', sources):
- links.append(link)
-
- # Fallback #3 (unavailable as at 22.06.2017)
- for _, link in re.findall(
- r'(?:videoSrc|videoIpadUrl|html5PlayerSrc)\s*[:=]\s*(["\'])(http.+?)\1', webpage):
- links.append(link)
-
- # Fallback #4, encrypted links (unavailable as at 22.06.2017)
- for _, encrypted_link in re.findall(
- r'encryptedQuality\d{3,4}URL\s*=\s*(["\'])([\da-zA-Z+/=]+)\1', webpage):
- links.append(aes_decrypt_text(encrypted_link, title, 32).decode('utf-8'))
-
- formats = []
- for video_url in set(unescapeHTML(link) for link in links):
- f = {
- 'url': video_url,
- }
- # Video URL's path looks like this:
- # /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
- # /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
- # We will benefit from it by extracting some metadata
- mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+/', video_url)
- if mobj:
- height = int(mobj.group('height'))
- bitrate = int(mobj.group('bitrate'))
- f.update({
- 'format_id': '%dp-%dk' % (height, bitrate),
- 'height': height,
- 'tbr': bitrate,
- })
- formats.append(f)
- self._sort_formats(formats)
-
- description = self._html_search_regex(
- r'(?s)<div[^>]+\bid=["\']description["\'][^>]*>(.+?)</div>',
- webpage, 'description',
- default=None) or self._og_search_description(
- webpage, default=None)
- thumbnail = self._search_regex(
- r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1',
- webpage, 'thumbnail', fatal=False, group='thumbnail')
-
- uploader = self._html_search_regex(
- r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
- webpage, 'uploader', fatal=False)
- upload_date = unified_strdate(self._html_search_regex(
- [r'Date\s+[Aa]dded:\s*<span>([^<]+)',
- r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>'],
- webpage, 'upload date', fatal=False))
-
- age_limit = self._rta_search(webpage)
-
- average_rating = int_or_none(self._search_regex(
- r'<div[^>]+class=["\']videoRatingPercentage["\'][^>]*>(\d+)%</div>',
- webpage, 'average rating', fatal=False))
-
- view_count = str_to_int(self._search_regex(
- r'(?s)<div[^>]+class=(["\']).*?\bvideoInfoViews\b.*?\1[^>]*>.*?(?P<count>[\d,.]+)<',
- webpage, 'view count', fatal=False, group='count'))
- comment_count = str_to_int(self._search_regex(
- r'>All [Cc]omments? \(([\d,.]+)\)',
- webpage, 'comment count', fatal=False))
-
- def extract_tag_box(regex, title):
- tag_box = self._search_regex(regex, webpage, title, default=None)
- if not tag_box:
- return []
- return re.findall(r'<a[^>]+href=[^>]+>([^<]+)', tag_box)
-
- categories = extract_tag_box(
- r'(?s)Categories:.*?</[^>]+>(.+?)</div>', 'categories')
- tags = extract_tag_box(
- r'(?s)Tags:.*?</div>\s*<div[^>]+class=["\']tagBoxContent["\'][^>]*>(.+?)</div>',
- 'tags')
-
- return {
- 'id': video_id,
- 'display_id': display_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'uploader': uploader,
- 'upload_date': upload_date,
- 'average_rating': average_rating,
- 'view_count': view_count,
- 'comment_count': comment_count,
- 'categories': categories,
- 'tags': tags,
- 'age_limit': age_limit,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/yourporn.py b/youtube_dl/extractor/yourporn.py
deleted file mode 100644
index 8a2d5f63b..000000000
--- a/youtube_dl/extractor/yourporn.py
+++ /dev/null
@@ -1,57 +0,0 @@
-from __future__ import unicode_literals
-
-from .common import InfoExtractor
-from ..utils import (
- parse_duration,
- urljoin,
-)
-
-
-class YourPornIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?(?:yourporn\.sexy|sxyprn\.com)/post/(?P<id>[^/?#&.]+)'
- _TESTS = [{
- 'url': 'https://yourporn.sexy/post/57ffcb2e1179b.html',
- 'md5': '6f8682b6464033d87acaa7a8ff0c092e',
- 'info_dict': {
- 'id': '57ffcb2e1179b',
- 'ext': 'mp4',
- 'title': 'md5:c9f43630bd968267672651ba905a7d35',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 165,
- 'age_limit': 18,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'https://sxyprn.com/post/57ffcb2e1179b.html',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- video_url = urljoin(url, self._parse_json(
- self._search_regex(
- r'data-vnfo=(["\'])(?P<data>{.+?})\1', webpage, 'data info',
- group='data'),
- video_id)[video_id]).replace('/cdn/', '/cdn5/')
-
- title = (self._search_regex(
- r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title',
- default=None) or self._og_search_description(webpage)).strip()
- thumbnail = self._og_search_thumbnail(webpage)
- duration = parse_duration(self._search_regex(
- r'duration\s*:\s*<[^>]+>([\d:]+)', webpage, 'duration',
- default=None))
-
- return {
- 'id': video_id,
- 'url': video_url,
- 'title': title,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'age_limit': 18,
- }
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
deleted file mode 100644
index 5e397324b..000000000
--- a/youtube_dl/extractor/youtube.py
+++ /dev/null
@@ -1,3327 +0,0 @@
-# coding: utf-8
-
-from __future__ import unicode_literals
-
-
-import itertools
-import json
-import os.path
-import random
-import re
-import time
-import traceback
-
-from .common import InfoExtractor, SearchInfoExtractor
-from ..jsinterp import JSInterpreter
-from ..swfinterp import SWFInterpreter
-from ..compat import (
- compat_chr,
- compat_HTTPError,
- compat_kwargs,
- compat_parse_qs,
- compat_urllib_parse_unquote,
- compat_urllib_parse_unquote_plus,
- compat_urllib_parse_urlencode,
- compat_urllib_parse_urlparse,
- compat_urlparse,
- compat_str,
-)
-from ..utils import (
- bool_or_none,
- clean_html,
- dict_get,
- error_to_compat_str,
- extract_attributes,
- ExtractorError,
- float_or_none,
- get_element_by_attribute,
- get_element_by_id,
- int_or_none,
- mimetype2ext,
- orderedSet,
- parse_codecs,
- parse_duration,
- remove_quotes,
- remove_start,
- smuggle_url,
- str_or_none,
- str_to_int,
- try_get,
- unescapeHTML,
- unified_strdate,
- unsmuggle_url,
- uppercase_escape,
- url_or_none,
- urlencode_postdata,
-)
-
-
-class YoutubeBaseInfoExtractor(InfoExtractor):
- """Provide base functions for Youtube extractors"""
- _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
- _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
-
- _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
- _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
- _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
-
- _NETRC_MACHINE = 'youtube'
- # If True it will raise an error if no login info is provided
- _LOGIN_REQUIRED = False
-
- _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)[0-9A-Za-z-_]{10,}'
-
- def _set_language(self):
- self._set_cookie(
- '.youtube.com', 'PREF', 'f1=50000000&hl=en',
- # YouTube sets the expire time to about two months
- expire_time=time.time() + 2 * 30 * 24 * 3600)
-
- def _ids_to_results(self, ids):
- return [
- self.url_result(vid_id, 'Youtube', video_id=vid_id)
- for vid_id in ids]
-
- def _login(self):
- """
- Attempt to log in to YouTube.
- True is returned if successful or skipped.
- False is returned if login failed.
-
- If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
- """
- username, password = self._get_login_info()
- # No authentication to be performed
- if username is None:
- if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
- raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
- return True
-
- login_page = self._download_webpage(
- self._LOGIN_URL, None,
- note='Downloading login page',
- errnote='unable to fetch login page', fatal=False)
- if login_page is False:
- return
-
- login_form = self._hidden_inputs(login_page)
-
- def req(url, f_req, note, errnote):
- data = login_form.copy()
- data.update({
- 'pstMsg': 1,
- 'checkConnection': 'youtube',
- 'checkedDomains': 'youtube',
- 'hl': 'en',
- 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
- 'f.req': json.dumps(f_req),
- 'flowName': 'GlifWebSignIn',
- 'flowEntry': 'ServiceLogin',
- # TODO: reverse actual botguard identifier generation algo
- 'bgRequest': '["identifier",""]',
- })
- return self._download_json(
- url, None, note=note, errnote=errnote,
- transform_source=lambda s: re.sub(r'^[^[]*', '', s),
- fatal=False,
- data=urlencode_postdata(data), headers={
- 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
- 'Google-Accounts-XSRF': 1,
- })
-
- def warn(message):
- self._downloader.report_warning(message)
-
- lookup_req = [
- username,
- None, [], None, 'US', None, None, 2, False, True,
- [
- None, None,
- [2, 1, None, 1,
- 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
- None, [], 4],
- 1, [None, None, []], None, None, None, True
- ],
- username,
- ]
-
- lookup_results = req(
- self._LOOKUP_URL, lookup_req,
- 'Looking up account info', 'Unable to look up account info')
-
- if lookup_results is False:
- return False
-
- user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
- if not user_hash:
- warn('Unable to extract user hash')
- return False
-
- challenge_req = [
- user_hash,
- None, 1, None, [1, None, None, None, [password, None, True]],
- [
- None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
- 1, [None, None, []], None, None, None, True
- ]]
-
- challenge_results = req(
- self._CHALLENGE_URL, challenge_req,
- 'Logging in', 'Unable to log in')
-
- if challenge_results is False:
- return
-
- login_res = try_get(challenge_results, lambda x: x[0][5], list)
- if login_res:
- login_msg = try_get(login_res, lambda x: x[5], compat_str)
- warn(
- 'Unable to login: %s' % 'Invalid password'
- if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
- return False
-
- res = try_get(challenge_results, lambda x: x[0][-1], list)
- if not res:
- warn('Unable to extract result entry')
- return False
-
- login_challenge = try_get(res, lambda x: x[0][0], list)
- if login_challenge:
- challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
- if challenge_str == 'TWO_STEP_VERIFICATION':
- # SEND_SUCCESS - TFA code has been successfully sent to phone
- # QUOTA_EXCEEDED - reached the limit of TFA codes
- status = try_get(login_challenge, lambda x: x[5], compat_str)
- if status == 'QUOTA_EXCEEDED':
- warn('Exceeded the limit of TFA codes, try later')
- return False
-
- tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
- if not tl:
- warn('Unable to extract TL')
- return False
-
- tfa_code = self._get_tfa_info('2-step verification code')
-
- if not tfa_code:
- warn(
- 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
- '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
- return False
-
- tfa_code = remove_start(tfa_code, 'G-')
-
- tfa_req = [
- user_hash, None, 2, None,
- [
- 9, None, None, None, None, None, None, None,
- [None, tfa_code, True, 2]
- ]]
-
- tfa_results = req(
- self._TFA_URL.format(tl), tfa_req,
- 'Submitting TFA code', 'Unable to submit TFA code')
-
- if tfa_results is False:
- return False
-
- tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
- if tfa_res:
- tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
- warn(
- 'Unable to finish TFA: %s' % 'Invalid TFA code'
- if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
- return False
-
- check_cookie_url = try_get(
- tfa_results, lambda x: x[0][-1][2], compat_str)
- else:
- CHALLENGES = {
- 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
- 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
- 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
- }
- challenge = CHALLENGES.get(
- challenge_str,
- '%s returned error %s.' % (self.IE_NAME, challenge_str))
- warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
- return False
- else:
- check_cookie_url = try_get(res, lambda x: x[2], compat_str)
-
- if not check_cookie_url:
- warn('Unable to extract CheckCookie URL')
- return False
-
- check_cookie_results = self._download_webpage(
- check_cookie_url, None, 'Checking cookie', fatal=False)
-
- if check_cookie_results is False:
- return False
-
- if 'https://myaccount.google.com/' not in check_cookie_results:
- warn('Unable to log in')
- return False
-
- return True
-
- def _download_webpage_handle(self, *args, **kwargs):
- query = kwargs.get('query', {}).copy()
- query['disable_polymer'] = 'true'
- kwargs['query'] = query
- return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
- *args, **compat_kwargs(kwargs))
-
- def _real_initialize(self):
- if self._downloader is None:
- return
- self._set_language()
- if not self._login():
- return
-
-
-class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
- # Extract entries from page with "Load more" button
- def _entries(self, page, playlist_id):
- more_widget_html = content_html = page
- for page_num in itertools.count(1):
- for entry in self._process_page(content_html):
- yield entry
-
- mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
- if not mobj:
- break
-
- count = 0
- retries = 3
- while count <= retries:
- try:
- # Downloading page may result in intermittent 5xx HTTP error
- # that is usually worked around with a retry
- more = self._download_json(
- 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
- 'Downloading page #%s%s'
- % (page_num, ' (retry #%d)' % count if count else ''),
- transform_source=uppercase_escape)
- break
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
- count += 1
- if count <= retries:
- continue
- raise
-
- content_html = more['content_html']
- if not content_html.strip():
- # Some webpages show a "Load more" button but they don't
- # have more videos
- break
- more_widget_html = more['load_more_widget_html']
-
-
-class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
- def _process_page(self, content):
- for video_id, video_title in self.extract_videos_from_page(content):
- yield self.url_result(video_id, 'Youtube', video_id, video_title)
-
- def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
- for mobj in re.finditer(video_re, page):
- # The link with index 0 is not the first video of the playlist (not sure if still actual)
- if 'index' in mobj.groupdict() and mobj.group('id') == '0':
- continue
- video_id = mobj.group('id')
- video_title = unescapeHTML(
- mobj.group('title')) if 'title' in mobj.groupdict() else None
- if video_title:
- video_title = video_title.strip()
- if video_title == '► Play all':
- video_title = None
- try:
- idx = ids_in_page.index(video_id)
- if video_title and not titles_in_page[idx]:
- titles_in_page[idx] = video_title
- except ValueError:
- ids_in_page.append(video_id)
- titles_in_page.append(video_title)
-
- def extract_videos_from_page(self, page):
- ids_in_page = []
- titles_in_page = []
- self.extract_videos_from_page_impl(
- self._VIDEO_RE, page, ids_in_page, titles_in_page)
- return zip(ids_in_page, titles_in_page)
-
-
-class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
- def _process_page(self, content):
- for playlist_id in orderedSet(re.findall(
- r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
- content)):
- yield self.url_result(
- 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
- webpage = self._download_webpage(url, playlist_id)
- title = self._og_search_title(webpage, fatal=False)
- return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
-
-
-class YoutubeIE(YoutubeBaseInfoExtractor):
- IE_DESC = 'YouTube.com'
- _VALID_URL = r"""(?x)^
- (
- (?:https?://|//) # http(s):// or protocol-independent URL
- (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/|
- (?:www\.)?deturl\.com/www\.youtube\.com/|
- (?:www\.)?pwnyoutube\.com/|
- (?:www\.)?hooktube\.com/|
- (?:www\.)?yourepeat\.com/|
- tube\.majestyc\.net/|
- # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
- (?:(?:www|dev)\.)?invidio\.us/|
- (?:(?:www|no)\.)?invidiou\.sh/|
- (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
- (?:www\.)?invidious\.kabi\.tk/|
- (?:www\.)?invidious\.13ad\.de/|
- (?:www\.)?invidious\.mastodon\.host/|
- (?:www\.)?invidious\.nixnet\.xyz/|
- (?:www\.)?invidious\.drycat\.fr/|
- (?:www\.)?tube\.poal\.co/|
- (?:www\.)?vid\.wxzm\.sx/|
- (?:www\.)?yt\.elukerio\.org/|
- (?:www\.)?yt\.lelux\.fi/|
- (?:www\.)?kgg2m7yk5aybusll\.onion/|
- (?:www\.)?qklhadlycap4cnod\.onion/|
- (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
- (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
- (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
- (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
- (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
- youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
- (?:.*?\#/)? # handle anchor (#/) redirect urls
- (?: # the various things that can precede the ID:
- (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
- |(?: # or the v= param in all its forms
- (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
- (?:\?|\#!?) # the params delimiter ? or # or #!
- (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
- v=
- )
- ))
- |(?:
- youtu\.be| # just youtu.be/xxxx
- vid\.plus| # or vid.plus/xxxx
- zwearz\.com/watch| # or zwearz.com/watch/xxxx
- )/
- |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
- )
- )? # all until now is optional -> you can pass the naked ID
- ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
- (?!.*?\blist=
- (?:
- %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
- WL # WL are handled by the watch later IE
- )
- )
- (?(1).+)? # if we found the ID, everything can follow
- $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
- _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
- _formats = {
- '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
- '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
- '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
- '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
- '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
- '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
- '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
- '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
- # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
- '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
- '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
- '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
- '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
- '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
- '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
- '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
- '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
- '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
-
-
- # 3D videos
- '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
- '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
- '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
- '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
- '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
- '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
- '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
-
- # Apple HTTP Live Streaming
- '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
- '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
- '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
- '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
- '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
- '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
- '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
- '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
-
- # DASH mp4 video
- '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
- '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
- '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
- '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
- '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
-
- # Dash mp4 audio
- '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
- '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
- '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
- '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
- '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
- '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
- '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
-
- # Dash webm
- '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
- '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
- '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
- '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
- '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
- '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
- '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
- '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
-
- # Dash webm audio
- '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
- '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
-
- # Dash webm audio with opus inside
- '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
- '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
- '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
-
- # RTMP (unnamed)
- '_rtmp': {'protocol': 'rtmp'},
-
- # av01 video only formats sometimes served with "unknown" codecs
- '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
- '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
- '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
- '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
- }
- _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
-
- _GEO_BYPASS = False
-
- IE_NAME = 'youtube'
- _TESTS = [
- {
- 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
- 'info_dict': {
- 'id': 'BaW_jenozKc',
- 'ext': 'mp4',
- 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
- 'uploader': 'Philipp Hagemeister',
- 'uploader_id': 'phihag',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
- 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
- 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
- 'upload_date': '20121002',
- 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
- 'categories': ['Science & Technology'],
- 'tags': ['youtube-dl'],
- 'duration': 10,
- 'view_count': int,
- 'like_count': int,
- 'dislike_count': int,
- 'start_time': 1,
- 'end_time': 9,
- }
- },
- {
- 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
- 'note': 'Test generic use_cipher_signature video (#897)',
- 'info_dict': {
- 'id': 'UxxajLWwzqY',
- 'ext': 'mp4',
- 'upload_date': '20120506',
- 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
- 'alt_title': 'I Love It (feat. Charli XCX)',
- 'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
- 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
- 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
- 'iconic ep', 'iconic', 'love', 'it'],
- 'duration': 180,
- 'uploader': 'Icona Pop',
- 'uploader_id': 'IconaPop',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
- 'creator': 'Icona Pop',
- 'track': 'I Love It (feat. Charli XCX)',
- 'artist': 'Icona Pop',
- }
- },
- {
- 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
- 'note': 'Test VEVO video with age protection (#956)',
- 'info_dict': {
- 'id': '07FYdnEawAQ',
- 'ext': 'mp4',
- 'upload_date': '20130703',
- 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
- 'alt_title': 'Tunnel Vision',
- 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
- 'duration': 419,
- 'uploader': 'justintimberlakeVEVO',
- 'uploader_id': 'justintimberlakeVEVO',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
- 'creator': 'Justin Timberlake',
- 'track': 'Tunnel Vision',
- 'artist': 'Justin Timberlake',
- 'age_limit': 18,
- }
- },
- {
- 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
- 'note': 'Embed-only video (#1746)',
- 'info_dict': {
- 'id': 'yZIXLfi8CZQ',
- 'ext': 'mp4',
- 'upload_date': '20120608',
- 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
- 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
- 'uploader': 'SET India',
- 'uploader_id': 'setindia',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
- 'age_limit': 18,
- }
- },
- {
- 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
- 'note': 'Use the first video ID in the URL',
- 'info_dict': {
- 'id': 'BaW_jenozKc',
- 'ext': 'mp4',
- 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
- 'uploader': 'Philipp Hagemeister',
- 'uploader_id': 'phihag',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
- 'upload_date': '20121002',
- 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
- 'categories': ['Science & Technology'],
- 'tags': ['youtube-dl'],
- 'duration': 10,
- 'view_count': int,
- 'like_count': int,
- 'dislike_count': int,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
- 'note': '256k DASH audio (format 141) via DASH manifest',
- 'info_dict': {
- 'id': 'a9LDPn-MO4I',
- 'ext': 'm4a',
- 'upload_date': '20121002',
- 'uploader_id': '8KVIDEO',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
- 'description': '',
- 'uploader': '8KVIDEO',
- 'title': 'UHDTV TEST 8K VIDEO.mp4'
- },
- 'params': {
- 'youtube_include_dash_manifest': True,
- 'format': '141',
- },
- 'skip': 'format 141 not served anymore',
- },
- # DASH manifest with encrypted signature
- {
- 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
- 'info_dict': {
- 'id': 'IB3lcPjvWLA',
- 'ext': 'm4a',
- 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
- 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
- 'duration': 244,
- 'uploader': 'AfrojackVEVO',
- 'uploader_id': 'AfrojackVEVO',
- 'upload_date': '20131011',
- },
- 'params': {
- 'youtube_include_dash_manifest': True,
- 'format': '141/bestaudio[ext=m4a]',
- },
- },
- # JS player signature function name containing $
- {
- 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
- 'info_dict': {
- 'id': 'nfWlot6h_JM',
- 'ext': 'm4a',
- 'title': 'Taylor Swift - Shake It Off',
- 'description': 'md5:bec2185232c05479482cb5a9b82719bf',
- 'duration': 242,
- 'uploader': 'TaylorSwiftVEVO',
- 'uploader_id': 'TaylorSwiftVEVO',
- 'upload_date': '20140818',
- 'creator': 'Taylor Swift',
- },
- 'params': {
- 'youtube_include_dash_manifest': True,
- 'format': '141/bestaudio[ext=m4a]',
- },
- },
- # Controversy video
- {
- 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
- 'info_dict': {
- 'id': 'T4XJQO3qol8',
- 'ext': 'mp4',
- 'duration': 219,
- 'upload_date': '20100909',
- 'uploader': 'Amazing Atheist',
- 'uploader_id': 'TheAmazingAtheist',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
- 'title': 'Burning Everyone\'s Koran',
- 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
- }
- },
- # Normal age-gate video (No vevo, embed allowed)
- {
- 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
- 'info_dict': {
- 'id': 'HtVdAasjOgU',
- 'ext': 'mp4',
- 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
- 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
- 'duration': 142,
- 'uploader': 'The Witcher',
- 'uploader_id': 'WitcherGame',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
- 'upload_date': '20140605',
- 'age_limit': 18,
- },
- },
- # Age-gate video with encrypted signature
- {
- 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
- 'info_dict': {
- 'id': '6kLq3WMV1nU',
- 'ext': 'mp4',
- 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
- 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
- 'duration': 246,
- 'uploader': 'LloydVEVO',
- 'uploader_id': 'LloydVEVO',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
- 'upload_date': '20110629',
- 'age_limit': 18,
- },
- },
- # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
- # YouTube Red ad is not captured for creator
- {
- 'url': '__2ABJjxzNo',
- 'info_dict': {
- 'id': '__2ABJjxzNo',
- 'ext': 'mp4',
- 'duration': 266,
- 'upload_date': '20100430',
- 'uploader_id': 'deadmau5',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
- 'creator': 'deadmau5',
- 'description': 'md5:12c56784b8032162bb936a5f76d55360',
- 'uploader': 'deadmau5',
- 'title': 'Deadmau5 - Some Chords (HD)',
- 'alt_title': 'Some Chords',
- },
- 'expected_warnings': [
- 'DASH manifest missing',
- ]
- },
- # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
- {
- 'url': 'lqQg6PlCWgI',
- 'info_dict': {
- 'id': 'lqQg6PlCWgI',
- 'ext': 'mp4',
- 'duration': 6085,
- 'upload_date': '20150827',
- 'uploader_id': 'olympic',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
- 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
- 'uploader': 'Olympic',
- 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
- },
- 'params': {
- 'skip_download': 'requires avconv',
- }
- },
- # Non-square pixels
- {
- 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
- 'info_dict': {
- 'id': '_b-2C3KPAM0',
- 'ext': 'mp4',
- 'stretched_ratio': 16 / 9.,
- 'duration': 85,
- 'upload_date': '20110310',
- 'uploader_id': 'AllenMeow',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
- 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
- 'uploader': '孫ᄋᄅ',
- 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
- },
- },
- # url_encoded_fmt_stream_map is empty string
- {
- 'url': 'qEJwOuvDf7I',
- 'info_dict': {
- 'id': 'qEJwOuvDf7I',
- 'ext': 'webm',
- 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
- 'description': '',
- 'upload_date': '20150404',
- 'uploader_id': 'spbelect',
- 'uploader': 'Наблюдатели Петербурга',
- },
- 'params': {
- 'skip_download': 'requires avconv',
- },
- 'skip': 'This live event has ended.',
- },
- # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
- {
- 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
- 'info_dict': {
- 'id': 'FIl7x6_3R5Y',
- 'ext': 'webm',
- 'title': 'md5:7b81415841e02ecd4313668cde88737a',
- 'description': 'md5:116377fd2963b81ec4ce64b542173306',
- 'duration': 220,
- 'upload_date': '20150625',
- 'uploader_id': 'dorappi2000',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
- 'uploader': 'dorappi2000',
- 'formats': 'mincount:31',
- },
- 'skip': 'not actual anymore',
- },
- # DASH manifest with segment_list
- {
- 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
- 'md5': '8ce563a1d667b599d21064e982ab9e31',
- 'info_dict': {
- 'id': 'CsmdDsKjzN8',
- 'ext': 'mp4',
- 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
- 'uploader': 'Airtek',
- 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
- 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
- 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
- },
- 'params': {
- 'youtube_include_dash_manifest': True,
- 'format': '135', # bestvideo
- },
- 'skip': 'This live event has ended.',
- },
- {
- # Multifeed videos (multiple cameras), URL is for Main Camera
- 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
- 'info_dict': {
- 'id': 'jqWvoWXjCVs',
- 'title': 'teamPGP: Rocket League Noob Stream',
- 'description': 'md5:dc7872fb300e143831327f1bae3af010',
- },
- 'playlist': [{
- 'info_dict': {
- 'id': 'jqWvoWXjCVs',
- 'ext': 'mp4',
- 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
- 'description': 'md5:dc7872fb300e143831327f1bae3af010',
- 'duration': 7335,
- 'upload_date': '20150721',
- 'uploader': 'Beer Games Beer',
- 'uploader_id': 'beergamesbeer',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
- 'license': 'Standard YouTube License',
- },
- }, {
- 'info_dict': {
- 'id': '6h8e8xoXJzg',
- 'ext': 'mp4',
- 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
- 'description': 'md5:dc7872fb300e143831327f1bae3af010',
- 'duration': 7337,
- 'upload_date': '20150721',
- 'uploader': 'Beer Games Beer',
- 'uploader_id': 'beergamesbeer',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
- 'license': 'Standard YouTube License',
- },
- }, {
- 'info_dict': {
- 'id': 'PUOgX5z9xZw',
- 'ext': 'mp4',
- 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
- 'description': 'md5:dc7872fb300e143831327f1bae3af010',
- 'duration': 7337,
- 'upload_date': '20150721',
- 'uploader': 'Beer Games Beer',
- 'uploader_id': 'beergamesbeer',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
- 'license': 'Standard YouTube License',
- },
- }, {
- 'info_dict': {
- 'id': 'teuwxikvS5k',
- 'ext': 'mp4',
- 'title': 'teamPGP: Rocket League Noob Stream (zim)',
- 'description': 'md5:dc7872fb300e143831327f1bae3af010',
- 'duration': 7334,
- 'upload_date': '20150721',
- 'uploader': 'Beer Games Beer',
- 'uploader_id': 'beergamesbeer',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
- 'license': 'Standard YouTube License',
- },
- }],
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'This video is not available.',
- },
- {
- # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
- 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
- 'info_dict': {
- 'id': 'gVfLd0zydlo',
- 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
- },
- 'playlist_count': 2,
- 'skip': 'Not multifeed anymore',
- },
- {
- 'url': 'https://vid.plus/FlRa-iH7PGw',
- 'only_matching': True,
- },
- {
- 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
- 'only_matching': True,
- },
- {
- # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
- # Also tests cut-off URL expansion in video description (see
- # https://github.com/ytdl-org/youtube-dl/issues/1892,
- # https://github.com/ytdl-org/youtube-dl/issues/8164)
- 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
- 'info_dict': {
- 'id': 'lsguqyKfVQg',
- 'ext': 'mp4',
- 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
- 'alt_title': 'Dark Walk - Position Music',
- 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
- 'duration': 133,
- 'upload_date': '20151119',
- 'uploader_id': 'IronSoulElf',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
- 'uploader': 'IronSoulElf',
- 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
- 'track': 'Dark Walk - Position Music',
- 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
- 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
- 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
- 'only_matching': True,
- },
- {
- # Video with yt:stretch=17:0
- 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
- 'info_dict': {
- 'id': 'Q39EVAstoRM',
- 'ext': 'mp4',
- 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
- 'description': 'md5:ee18a25c350637c8faff806845bddee9',
- 'upload_date': '20151107',
- 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
- 'uploader': 'CH GAMER DROID',
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'This video does not exist.',
- },
- {
- # Video licensed under Creative Commons
- 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
- 'info_dict': {
- 'id': 'M4gD1WSo5mA',
- 'ext': 'mp4',
- 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
- 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
- 'duration': 721,
- 'upload_date': '20150127',
- 'uploader_id': 'BerkmanCenter',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
- 'uploader': 'The Berkman Klein Center for Internet & Society',
- 'license': 'Creative Commons Attribution license (reuse allowed)',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # Channel-like uploader_url
- 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
- 'info_dict': {
- 'id': 'eQcmzGIKrzg',
- 'ext': 'mp4',
- 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
- 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
- 'duration': 4060,
- 'upload_date': '20151119',
- 'uploader': 'Bernie Sanders',
- 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
- 'license': 'Creative Commons Attribution license (reuse allowed)',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
- 'only_matching': True,
- },
- {
- # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
- 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
- 'only_matching': True,
- },
- {
- # Rental video preview
- 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
- 'info_dict': {
- 'id': 'uGpuVWrhIzE',
- 'ext': 'mp4',
- 'title': 'Piku - Trailer',
- 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
- 'upload_date': '20150811',
- 'uploader': 'FlixMatrix',
- 'uploader_id': 'FlixMatrixKaravan',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
- 'license': 'Standard YouTube License',
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'This video is not available.',
- },
- {
- # YouTube Red video with episode data
- 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
- 'info_dict': {
- 'id': 'iqKdEhx-dD4',
- 'ext': 'mp4',
- 'title': 'Isolation - Mind Field (Ep 1)',
- 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
- 'duration': 2085,
- 'upload_date': '20170118',
- 'uploader': 'Vsauce',
- 'uploader_id': 'Vsauce',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
- 'series': 'Mind Field',
- 'season_number': 1,
- 'episode_number': 1,
- },
- 'params': {
- 'skip_download': True,
- },
- 'expected_warnings': [
- 'Skipping DASH manifest',
- ],
- },
- {
- # The following content has been identified by the YouTube community
- # as inappropriate or offensive to some audiences.
- 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
- 'info_dict': {
- 'id': '6SJNVb0GnPI',
- 'ext': 'mp4',
- 'title': 'Race Differences in Intelligence',
- 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
- 'duration': 965,
- 'upload_date': '20140124',
- 'uploader': 'New Century Foundation',
- 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # itag 212
- 'url': '1t24XAntNCY',
- 'only_matching': True,
- },
- {
- # geo restricted to JP
- 'url': 'sJL6WA-aGkQ',
- 'only_matching': True,
- },
- {
- 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
- 'only_matching': True,
- },
- {
- 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
- 'only_matching': True,
- },
- {
- # DRM protected
- 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
- 'only_matching': True,
- },
- {
- # Video with unsupported adaptive stream type formats
- 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
- 'info_dict': {
- 'id': 'Z4Vy8R84T1U',
- 'ext': 'mp4',
- 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
- 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
- 'duration': 433,
- 'upload_date': '20130923',
- 'uploader': 'Amelia Putri Harwita',
- 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
- 'formats': 'maxcount:10',
- },
- 'params': {
- 'skip_download': True,
- 'youtube_include_dash_manifest': False,
- },
- },
- {
- # Youtube Music Auto-generated description
- 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
- 'info_dict': {
- 'id': 'MgNrAu2pzNs',
- 'ext': 'mp4',
- 'title': 'Voyeur Girl',
- 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
- 'upload_date': '20190312',
- 'uploader': 'Various Artists - Topic',
- 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
- 'artist': 'Stephen',
- 'track': 'Voyeur Girl',
- 'album': 'it\'s too much love to know my dear',
- 'release_date': '20190313',
- 'release_year': 2019,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # Youtube Music Auto-generated description
- # Retrieve 'artist' field from 'Artist:' in video description
- # when it is present on youtube music video
- 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
- 'info_dict': {
- 'id': 'k0jLE7tTwjY',
- 'ext': 'mp4',
- 'title': 'Latch Feat. Sam Smith',
- 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
- 'upload_date': '20150110',
- 'uploader': 'Various Artists - Topic',
- 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
- 'artist': 'Disclosure',
- 'track': 'Latch Feat. Sam Smith',
- 'album': 'Latch Featuring Sam Smith',
- 'release_date': '20121008',
- 'release_year': 2012,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # Youtube Music Auto-generated description
- # handle multiple artists on youtube music video
- 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
- 'info_dict': {
- 'id': '74qn0eJSjpA',
- 'ext': 'mp4',
- 'title': 'Eastside',
- 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
- 'upload_date': '20180710',
- 'uploader': 'Benny Blanco - Topic',
- 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
- 'artist': 'benny blanco, Halsey, Khalid',
- 'track': 'Eastside',
- 'album': 'Eastside',
- 'release_date': '20180713',
- 'release_year': 2018,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- {
- # Youtube Music Auto-generated description
- # handle youtube music video with release_year and no release_date
- 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
- 'info_dict': {
- 'id': '-hcAI0g-f5M',
- 'ext': 'mp4',
- 'title': 'Put It On Me',
- 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
- 'upload_date': '20180426',
- 'uploader': 'Matt Maeson - Topic',
- 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
- 'artist': 'Matt Maeson',
- 'track': 'Put It On Me',
- 'album': 'The Hearse',
- 'release_date': None,
- 'release_year': 2018,
- },
- 'params': {
- 'skip_download': True,
- },
- },
- ]
-
- def __init__(self, *args, **kwargs):
- super(YoutubeIE, self).__init__(*args, **kwargs)
- self._player_cache = {}
-
- def report_video_info_webpage_download(self, video_id):
- """Report attempt to download video info webpage."""
- self.to_screen('%s: Downloading video info webpage' % video_id)
-
- def report_information_extraction(self, video_id):
- """Report attempt to extract video information."""
- self.to_screen('%s: Extracting video information' % video_id)
-
- def report_unavailable_format(self, video_id, format):
- """Report extracted video URL."""
- self.to_screen('%s: Format %s not available' % (video_id, format))
-
- def report_rtmp_download(self):
- """Indicate the download will use the RTMP protocol."""
- self.to_screen('RTMP download detected')
-
- def _signature_cache_id(self, example_sig):
- """ Return a string representation of a signature """
- return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
-
- def _extract_signature_function(self, video_id, player_url, example_sig):
- id_m = re.match(
- r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
- player_url)
- if not id_m:
- raise ExtractorError('Cannot identify player %r' % player_url)
- player_type = id_m.group('ext')
- player_id = id_m.group('id')
-
- # Read from filesystem cache
- func_id = '%s_%s_%s' % (
- player_type, player_id, self._signature_cache_id(example_sig))
- assert os.path.basename(func_id) == func_id
-
- cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
- if cache_spec is not None:
- return lambda s: ''.join(s[i] for i in cache_spec)
-
- download_note = (
- 'Downloading player %s' % player_url
- if self._downloader.params.get('verbose') else
- 'Downloading %s player %s' % (player_type, player_id)
- )
- if player_type == 'js':
- code = self._download_webpage(
- player_url, video_id,
- note=download_note,
- errnote='Download of %s failed' % player_url)
- res = self._parse_sig_js(code)
- elif player_type == 'swf':
- urlh = self._request_webpage(
- player_url, video_id,
- note=download_note,
- errnote='Download of %s failed' % player_url)
- code = urlh.read()
- res = self._parse_sig_swf(code)
- else:
- assert False, 'Invalid player type %r' % player_type
-
- test_string = ''.join(map(compat_chr, range(len(example_sig))))
- cache_res = res(test_string)
- cache_spec = [ord(c) for c in cache_res]
-
- self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
- return res
-
- def _print_sig_code(self, func, example_sig):
- def gen_sig_code(idxs):
- def _genslice(start, end, step):
- starts = '' if start == 0 else str(start)
- ends = (':%d' % (end + step)) if end + step >= 0 else ':'
- steps = '' if step == 1 else (':%d' % step)
- return 's[%s%s%s]' % (starts, ends, steps)
-
- step = None
- # Quelch pyflakes warnings - start will be set when step is set
- start = '(Never used)'
- for i, prev in zip(idxs[1:], idxs[:-1]):
- if step is not None:
- if i - prev == step:
- continue
- yield _genslice(start, prev, step)
- step = None
- continue
- if i - prev in [-1, 1]:
- step = i - prev
- start = prev
- continue
- else:
- yield 's[%d]' % prev
- if step is None:
- yield 's[%d]' % i
- else:
- yield _genslice(start, i, step)
-
- test_string = ''.join(map(compat_chr, range(len(example_sig))))
- cache_res = func(test_string)
- cache_spec = [ord(c) for c in cache_res]
- expr_code = ' + '.join(gen_sig_code(cache_spec))
- signature_id_tuple = '(%s)' % (
- ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
- code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
- ' return %s\n') % (signature_id_tuple, expr_code)
- self.to_screen('Extracted signature function:\n' + code)
-
- def _parse_sig_js(self, jscode):
- funcname = self._search_regex(
- (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
- # Obsolete patterns
- r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
- r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
- r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
- jscode, 'Initial JS player signature function name', group='sig')
-
- jsi = JSInterpreter(jscode)
- initial_function = jsi.extract_function(funcname)
- return lambda s: initial_function([s])
-
- def _parse_sig_swf(self, file_contents):
- swfi = SWFInterpreter(file_contents)
- TARGET_CLASSNAME = 'SignatureDecipher'
- searched_class = swfi.extract_class(TARGET_CLASSNAME)
- initial_function = swfi.extract_function(searched_class, 'decipher')
- return lambda s: initial_function([s])
-
- def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
- """Turn the encrypted s field into a working signature"""
-
- if player_url is None:
- raise ExtractorError('Cannot decrypt signature without player_url')
-
- if player_url.startswith('//'):
- player_url = 'https:' + player_url
- elif not re.match(r'https?://', player_url):
- player_url = compat_urlparse.urljoin(
- 'https://www.youtube.com', player_url)
- try:
- player_id = (player_url, self._signature_cache_id(s))
- if player_id not in self._player_cache:
- func = self._extract_signature_function(
- video_id, player_url, s
- )
- self._player_cache[player_id] = func
- func = self._player_cache[player_id]
- if self._downloader.params.get('youtube_print_sig_code'):
- self._print_sig_code(func, s)
- return func(s)
- except Exception as e:
- tb = traceback.format_exc()
- raise ExtractorError(
- 'Signature extraction failed: ' + tb, cause=e)
-
- def _get_subtitles(self, video_id, webpage):
- try:
- subs_doc = self._download_xml(
- 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
- video_id, note=False)
- except ExtractorError as err:
- self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
- return {}
-
- sub_lang_list = {}
- for track in subs_doc.findall('track'):
- lang = track.attrib['lang_code']
- if lang in sub_lang_list:
- continue
- sub_formats = []
- for ext in self._SUBTITLE_FORMATS:
- params = compat_urllib_parse_urlencode({
- 'lang': lang,
- 'v': video_id,
- 'fmt': ext,
- 'name': track.attrib['name'].encode('utf-8'),
- })
- sub_formats.append({
- 'url': 'https://www.youtube.com/api/timedtext?' + params,
- 'ext': ext,
- })
- sub_lang_list[lang] = sub_formats
- if not sub_lang_list:
- self._downloader.report_warning('video doesn\'t have subtitles')
- return {}
- return sub_lang_list
-
- def _get_ytplayer_config(self, video_id, webpage):
- patterns = (
- # User data may contain arbitrary character sequences that may affect
- # JSON extraction with regex, e.g. when '};' is contained the second
- # regex won't capture the whole JSON. Yet working around by trying more
- # concrete regex first keeping in mind proper quoted string handling
- # to be implemented in future that will replace this workaround (see
- # https://github.com/ytdl-org/youtube-dl/issues/7468,
- # https://github.com/ytdl-org/youtube-dl/pull/7599)
- r';ytplayer\.config\s*=\s*({.+?});ytplayer',
- r';ytplayer\.config\s*=\s*({.+?});',
- )
- config = self._search_regex(
- patterns, webpage, 'ytplayer.config', default=None)
- if config:
- return self._parse_json(
- uppercase_escape(config), video_id, fatal=False)
-
- def _get_automatic_captions(self, video_id, webpage):
- """We need the webpage for getting the captions url, pass it as an
- argument to speed up the process."""
- self.to_screen('%s: Looking for automatic captions' % video_id)
- player_config = self._get_ytplayer_config(video_id, webpage)
- err_msg = 'Couldn\'t find automatic captions for %s' % video_id
- if not player_config:
- self._downloader.report_warning(err_msg)
- return {}
- try:
- args = player_config['args']
- caption_url = args.get('ttsurl')
- if caption_url:
- timestamp = args['timestamp']
- # We get the available subtitles
- list_params = compat_urllib_parse_urlencode({
- 'type': 'list',
- 'tlangs': 1,
- 'asrs': 1,
- })
- list_url = caption_url + '&' + list_params
- caption_list = self._download_xml(list_url, video_id)
- original_lang_node = caption_list.find('track')
- if original_lang_node is None:
- self._downloader.report_warning('Video doesn\'t have automatic captions')
- return {}
- original_lang = original_lang_node.attrib['lang_code']
- caption_kind = original_lang_node.attrib.get('kind', '')
-
- sub_lang_list = {}
- for lang_node in caption_list.findall('target'):
- sub_lang = lang_node.attrib['lang_code']
- sub_formats = []
- for ext in self._SUBTITLE_FORMATS:
- params = compat_urllib_parse_urlencode({
- 'lang': original_lang,
- 'tlang': sub_lang,
- 'fmt': ext,
- 'ts': timestamp,
- 'kind': caption_kind,
- })
- sub_formats.append({
- 'url': caption_url + '&' + params,
- 'ext': ext,
- })
- sub_lang_list[sub_lang] = sub_formats
- return sub_lang_list
-
- def make_captions(sub_url, sub_langs):
- parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
- caption_qs = compat_parse_qs(parsed_sub_url.query)
- captions = {}
- for sub_lang in sub_langs:
- sub_formats = []
- for ext in self._SUBTITLE_FORMATS:
- caption_qs.update({
- 'tlang': [sub_lang],
- 'fmt': [ext],
- })
- sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
- query=compat_urllib_parse_urlencode(caption_qs, True)))
- sub_formats.append({
- 'url': sub_url,
- 'ext': ext,
- })
- captions[sub_lang] = sub_formats
- return captions
-
- # New captions format as of 22.06.2017
- player_response = args.get('player_response')
- if player_response and isinstance(player_response, compat_str):
- player_response = self._parse_json(
- player_response, video_id, fatal=False)
- if player_response:
- renderer = player_response['captions']['playerCaptionsTracklistRenderer']
- base_url = renderer['captionTracks'][0]['baseUrl']
- sub_lang_list = []
- for lang in renderer['translationLanguages']:
- lang_code = lang.get('languageCode')
- if lang_code:
- sub_lang_list.append(lang_code)
- return make_captions(base_url, sub_lang_list)
-
- # Some videos don't provide ttsurl but rather caption_tracks and
- # caption_translation_languages (e.g. 20LmZk1hakA)
- # Does not used anymore as of 22.06.2017
- caption_tracks = args['caption_tracks']
- caption_translation_languages = args['caption_translation_languages']
- caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
- sub_lang_list = []
- for lang in caption_translation_languages.split(','):
- lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
- sub_lang = lang_qs.get('lc', [None])[0]
- if sub_lang:
- sub_lang_list.append(sub_lang)
- return make_captions(caption_url, sub_lang_list)
- # An extractor error can be raise by the download process if there are
- # no automatic captions but there are subtitles
- except (KeyError, IndexError, ExtractorError):
- self._downloader.report_warning(err_msg)
- return {}
-
- def _mark_watched(self, video_id, video_info, player_response):
- playback_url = url_or_none(try_get(
- player_response,
- lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
- video_info, lambda x: x['videostats_playback_base_url'][0]))
- if not playback_url:
- return
- parsed_playback_url = compat_urlparse.urlparse(playback_url)
- qs = compat_urlparse.parse_qs(parsed_playback_url.query)
-
- # cpn generation algorithm is reverse engineered from base.js.
- # In fact it works even with dummy cpn.
- CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
- cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
-
- qs.update({
- 'ver': ['2'],
- 'cpn': [cpn],
- })
- playback_url = compat_urlparse.urlunparse(
- parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
-
- self._download_webpage(
- playback_url, video_id, 'Marking watched',
- 'Unable to mark watched', fatal=False)
-
- @staticmethod
- def _extract_urls(webpage):
- # Embedded YouTube player
- entries = [
- unescapeHTML(mobj.group('url'))
- for mobj in re.finditer(r'''(?x)
- (?:
- <iframe[^>]+?src=|
- data-video-url=|
- <embed[^>]+?src=|
- embedSWF\(?:\s*|
- <object[^>]+data=|
- new\s+SWFObject\(
- )
- (["\'])
- (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
- (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
- \1''', webpage)]
-
- # lazyYT YouTube embed
- entries.extend(list(map(
- unescapeHTML,
- re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
-
- # Wordpress "YouTube Video Importer" plugin
- matches = re.findall(r'''(?x)<div[^>]+
- class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
- data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
- entries.extend(m[-1] for m in matches)
-
- return entries
-
- @staticmethod
- def _extract_url(webpage):
- urls = YoutubeIE._extract_urls(webpage)
- return urls[0] if urls else None
-
- @classmethod
- def extract_id(cls, url):
- mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
- if mobj is None:
- raise ExtractorError('Invalid URL: %s' % url)
- video_id = mobj.group(2)
- return video_id
-
- @staticmethod
- def _extract_chapters(description, duration):
- if not description:
- return None
- chapter_lines = re.findall(
- r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
- description)
- if not chapter_lines:
- return None
- chapters = []
- for next_num, (chapter_line, time_point) in enumerate(
- chapter_lines, start=1):
- start_time = parse_duration(time_point)
- if start_time is None:
- continue
- if start_time > duration:
- break
- end_time = (duration if next_num == len(chapter_lines)
- else parse_duration(chapter_lines[next_num][1]))
- if end_time is None:
- continue
- if end_time > duration:
- end_time = duration
- if start_time > end_time:
- break
- chapter_title = re.sub(
- r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
- chapter_title = re.sub(r'\s+', ' ', chapter_title)
- chapters.append({
- 'start_time': start_time,
- 'end_time': end_time,
- 'title': chapter_title,
- })
- return chapters
-
- def _real_extract(self, url):
- url, smuggled_data = unsmuggle_url(url, {})
-
- proto = (
- 'http' if self._downloader.params.get('prefer_insecure', False)
- else 'https')
-
- start_time = None
- end_time = None
- parsed_url = compat_urllib_parse_urlparse(url)
- for component in [parsed_url.fragment, parsed_url.query]:
- query = compat_parse_qs(component)
- if start_time is None and 't' in query:
- start_time = parse_duration(query['t'][0])
- if start_time is None and 'start' in query:
- start_time = parse_duration(query['start'][0])
- if end_time is None and 'end' in query:
- end_time = parse_duration(query['end'][0])
-
- # Extract original video URL from URL with redirection, like age verification, using next_url parameter
- mobj = re.search(self._NEXT_URL_RE, url)
- if mobj:
- url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
- video_id = self.extract_id(url)
-
- # Get video webpage
- url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
- video_webpage = self._download_webpage(url, video_id)
-
- # Attempt to extract SWF player URL
- mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
- if mobj is not None:
- player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
- else:
- player_url = None
-
- dash_mpds = []
-
- def add_dash_mpd(video_info):
- dash_mpd = video_info.get('dashmpd')
- if dash_mpd and dash_mpd[0] not in dash_mpds:
- dash_mpds.append(dash_mpd[0])
-
- def add_dash_mpd_pr(pl_response):
- dash_mpd = url_or_none(try_get(
- pl_response, lambda x: x['streamingData']['dashManifestUrl'],
- compat_str))
- if dash_mpd and dash_mpd not in dash_mpds:
- dash_mpds.append(dash_mpd)
-
- is_live = None
- view_count = None
-
- def extract_view_count(v_info):
- return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
-
- def extract_token(v_info):
- return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
-
- def extract_player_response(player_response, video_id):
- pl_response = str_or_none(player_response)
- if not pl_response:
- return
- pl_response = self._parse_json(pl_response, video_id, fatal=False)
- if isinstance(pl_response, dict):
- add_dash_mpd_pr(pl_response)
- return pl_response
-
- player_response = {}
-
- # Get video info
- embed_webpage = None
- if re.search(r'player-age-gate-content">', video_webpage) is not None:
- age_gate = True
- # We simulate the access to the video from www.youtube.com/v/{video_id}
- # this can be viewed without login into Youtube
- url = proto + '://www.youtube.com/embed/%s' % video_id
- embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
- data = compat_urllib_parse_urlencode({
- 'video_id': video_id,
- 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
- 'sts': self._search_regex(
- r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
- })
- video_info_url = proto + '://www.youtube.com/get_video_info?' + data
- video_info_webpage = self._download_webpage(
- video_info_url, video_id,
- note='Refetching age-gated info webpage',
- errnote='unable to download video info webpage')
- video_info = compat_parse_qs(video_info_webpage)
- pl_response = video_info.get('player_response', [None])[0]
- player_response = extract_player_response(pl_response, video_id)
- add_dash_mpd(video_info)
- view_count = extract_view_count(video_info)
- else:
- age_gate = False
- video_info = None
- sts = None
- # Try looking directly into the video webpage
- ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
- if ytplayer_config:
- args = ytplayer_config['args']
- if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
- # Convert to the same format returned by compat_parse_qs
- video_info = dict((k, [v]) for k, v in args.items())
- add_dash_mpd(video_info)
- # Rental video is not rented but preview is available (e.g.
- # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
- # https://github.com/ytdl-org/youtube-dl/issues/10532)
- if not video_info and args.get('ypc_vid'):
- return self.url_result(
- args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
- if args.get('livestream') == '1' or args.get('live_playback') == 1:
- is_live = True
- sts = ytplayer_config.get('sts')
- if not player_response:
- player_response = extract_player_response(args.get('player_response'), video_id)
- if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
- add_dash_mpd_pr(player_response)
- # We also try looking in get_video_info since it may contain different dashmpd
- # URL that points to a DASH manifest with possibly different itag set (some itags
- # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
- # manifest pointed by get_video_info's dashmpd).
- # The general idea is to take a union of itags of both DASH manifests (for example
- # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
- self.report_video_info_webpage_download(video_id)
- for el in ('embedded', 'detailpage', 'vevo', ''):
- query = {
- 'video_id': video_id,
- 'ps': 'default',
- 'eurl': '',
- 'gl': 'US',
- 'hl': 'en',
- }
- if el:
- query['el'] = el
- if sts:
- query['sts'] = sts
- video_info_webpage = self._download_webpage(
- '%s://www.youtube.com/get_video_info' % proto,
- video_id, note=False,
- errnote='unable to download video info webpage',
- fatal=False, query=query)
- if not video_info_webpage:
- continue
- get_video_info = compat_parse_qs(video_info_webpage)
- if not player_response:
- pl_response = get_video_info.get('player_response', [None])[0]
- player_response = extract_player_response(pl_response, video_id)
- add_dash_mpd(get_video_info)
- if view_count is None:
- view_count = extract_view_count(get_video_info)
- if not video_info:
- video_info = get_video_info
- get_token = extract_token(get_video_info)
- if get_token:
- # Different get_video_info requests may report different results, e.g.
- # some may report video unavailability, but some may serve it without
- # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
- # the original webpage as well as el=info and el=embedded get_video_info
- # requests report video unavailability due to geo restriction while
- # el=detailpage succeeds and returns valid data). This is probably
- # due to YouTube measures against IP ranges of hosting providers.
- # Working around by preferring the first succeeded video_info containing
- # the token if no such video_info yet was found.
- token = extract_token(video_info)
- if not token:
- video_info = get_video_info
- break
-
- def extract_unavailable_message():
- messages = []
- for tag, kind in (('h1', 'message'), ('div', 'submessage')):
- msg = self._html_search_regex(
- r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
- video_webpage, 'unavailable %s' % kind, default=None)
- if msg:
- messages.append(msg)
- if messages:
- return '\n'.join(messages)
-
- if not video_info:
- unavailable_message = extract_unavailable_message()
- if not unavailable_message:
- unavailable_message = 'Unable to extract video data'
- raise ExtractorError(
- 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
-
- video_details = try_get(
- player_response, lambda x: x['videoDetails'], dict) or {}
-
- video_title = video_info.get('title', [None])[0] or video_details.get('title')
- if not video_title:
- self._downloader.report_warning('Unable to extract video title')
- video_title = '_'
-
- description_original = video_description = get_element_by_id("eow-description", video_webpage)
- if video_description:
-
- def replace_url(m):
- redir_url = compat_urlparse.urljoin(url, m.group(1))
- parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
- if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
- qs = compat_parse_qs(parsed_redir_url.query)
- q = qs.get('q')
- if q and q[0]:
- return q[0]
- return redir_url
-
- description_original = video_description = re.sub(r'''(?x)
- <a\s+
- (?:[a-zA-Z-]+="[^"]*"\s+)*?
- (?:title|href)="([^"]+)"\s+
- (?:[a-zA-Z-]+="[^"]*"\s+)*?
- class="[^"]*"[^>]*>
- [^<]+\.{3}\s*
- </a>
- ''', replace_url, video_description)
- video_description = clean_html(video_description)
- else:
- video_description = self._html_search_meta('description', video_webpage) or video_details.get('shortDescription')
-
- if not smuggled_data.get('force_singlefeed', False):
- if not self._downloader.params.get('noplaylist'):
- multifeed_metadata_list = try_get(
- player_response,
- lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
- compat_str) or try_get(
- video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
- if multifeed_metadata_list:
- entries = []
- feed_ids = []
- for feed in multifeed_metadata_list.split(','):
- # Unquote should take place before split on comma (,) since textual
- # fields may contain comma as well (see
- # https://github.com/ytdl-org/youtube-dl/issues/8536)
- feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
- entries.append({
- '_type': 'url_transparent',
- 'ie_key': 'Youtube',
- 'url': smuggle_url(
- '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
- {'force_singlefeed': True}),
- 'title': '%s (%s)' % (video_title, feed_data['title'][0]),
- })
- feed_ids.append(feed_data['id'][0])
- self.to_screen(
- 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
- % (', '.join(feed_ids), video_id))
- return self.playlist_result(entries, video_id, video_title, video_description)
- else:
- self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
-
- if view_count is None:
- view_count = extract_view_count(video_info)
- if view_count is None and video_details:
- view_count = int_or_none(video_details.get('viewCount'))
-
- if is_live is None:
- is_live = bool_or_none(video_details.get('isLive'))
-
- # Check for "rental" videos
- if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
- raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
-
- def _extract_filesize(media_url):
- return int_or_none(self._search_regex(
- r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
-
- streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
- streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
-
- if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
- self.report_rtmp_download()
- formats = [{
- 'format_id': '_rtmp',
- 'protocol': 'rtmp',
- 'url': video_info['conn'][0],
- 'player_url': player_url,
- }]
- elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
- encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
- if 'rtmpe%3Dyes' in encoded_url_map:
- raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
- formats = []
- formats_spec = {}
- fmt_list = video_info.get('fmt_list', [''])[0]
- if fmt_list:
- for fmt in fmt_list.split(','):
- spec = fmt.split('/')
- if len(spec) > 1:
- width_height = spec[1].split('x')
- if len(width_height) == 2:
- formats_spec[spec[0]] = {
- 'resolution': spec[1],
- 'width': int_or_none(width_height[0]),
- 'height': int_or_none(width_height[1]),
- }
- for fmt in streaming_formats:
- itag = str_or_none(fmt.get('itag'))
- if not itag:
- continue
- quality = fmt.get('quality')
- quality_label = fmt.get('qualityLabel') or quality
- formats_spec[itag] = {
- 'asr': int_or_none(fmt.get('audioSampleRate')),
- 'filesize': int_or_none(fmt.get('contentLength')),
- 'format_note': quality_label,
- 'fps': int_or_none(fmt.get('fps')),
- 'height': int_or_none(fmt.get('height')),
- # bitrate for itag 43 is always 2147483647
- 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
- 'width': int_or_none(fmt.get('width')),
- }
-
- for fmt in streaming_formats:
- if fmt.get('drm_families'):
- continue
- url = url_or_none(fmt.get('url'))
-
- if not url:
- cipher = fmt.get('cipher')
- if not cipher:
- continue
- url_data = compat_parse_qs(cipher)
- url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
- if not url:
- continue
- else:
- cipher = None
- url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
-
- stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
- # Unsupported FORMAT_STREAM_TYPE_OTF
- if stream_type == 3:
- continue
-
- format_id = fmt.get('itag') or url_data['itag'][0]
- if not format_id:
- continue
- format_id = compat_str(format_id)
-
- if cipher:
- if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
- ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
- jsplayer_url_json = self._search_regex(
- ASSETS_RE,
- embed_webpage if age_gate else video_webpage,
- 'JS player URL (1)', default=None)
- if not jsplayer_url_json and not age_gate:
- # We need the embed website after all
- if embed_webpage is None:
- embed_url = proto + '://www.youtube.com/embed/%s' % video_id
- embed_webpage = self._download_webpage(
- embed_url, video_id, 'Downloading embed webpage')
- jsplayer_url_json = self._search_regex(
- ASSETS_RE, embed_webpage, 'JS player URL')
-
- player_url = json.loads(jsplayer_url_json)
- if player_url is None:
- player_url_json = self._search_regex(
- r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
- video_webpage, 'age gate player URL')
- player_url = json.loads(player_url_json)
-
- if 'sig' in url_data:
- url += '&signature=' + url_data['sig'][0]
- elif 's' in url_data:
- encrypted_sig = url_data['s'][0]
-
- if self._downloader.params.get('verbose'):
- if player_url is None:
- player_version = 'unknown'
- player_desc = 'unknown'
- else:
- if player_url.endswith('swf'):
- player_version = self._search_regex(
- r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
- 'flash player', fatal=False)
- player_desc = 'flash player %s' % player_version
- else:
- player_version = self._search_regex(
- [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
- r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
- player_url,
- 'html5 player', fatal=False)
- player_desc = 'html5 player %s' % player_version
-
- parts_sizes = self._signature_cache_id(encrypted_sig)
- self.to_screen('{%s} signature length %s, %s' %
- (format_id, parts_sizes, player_desc))
-
- signature = self._decrypt_signature(
- encrypted_sig, video_id, player_url, age_gate)
- sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
- url += '&%s=%s' % (sp, signature)
- if 'ratebypass' not in url:
- url += '&ratebypass=yes'
-
- dct = {
- 'format_id': format_id,
- 'url': url,
- 'player_url': player_url,
- }
- if format_id in self._formats:
- dct.update(self._formats[format_id])
- if format_id in formats_spec:
- dct.update(formats_spec[format_id])
-
- # Some itags are not included in DASH manifest thus corresponding formats will
- # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
- # Trying to extract metadata from url_encoded_fmt_stream_map entry.
- mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
- width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
-
- if width is None:
- width = int_or_none(fmt.get('width'))
- if height is None:
- height = int_or_none(fmt.get('height'))
-
- filesize = int_or_none(url_data.get(
- 'clen', [None])[0]) or _extract_filesize(url)
-
- quality = url_data.get('quality', [None])[0] or fmt.get('quality')
- quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
-
- tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
- or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
- fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
-
- more_fields = {
- 'filesize': filesize,
- 'tbr': tbr,
- 'width': width,
- 'height': height,
- 'fps': fps,
- 'format_note': quality_label or quality,
- }
- for key, value in more_fields.items():
- if value:
- dct[key] = value
- type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
- if type_:
- type_split = type_.split(';')
- kind_ext = type_split[0].split('/')
- if len(kind_ext) == 2:
- kind, _ = kind_ext
- dct['ext'] = mimetype2ext(type_split[0])
- if kind in ('audio', 'video'):
- codecs = None
- for mobj in re.finditer(
- r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
- if mobj.group('key') == 'codecs':
- codecs = mobj.group('val')
- break
- if codecs:
- dct.update(parse_codecs(codecs))
- if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
- dct['downloader_options'] = {
- # Youtube throttles chunks >~10M
- 'http_chunk_size': 10485760,
- }
- formats.append(dct)
- else:
- manifest_url = (
- url_or_none(try_get(
- player_response,
- lambda x: x['streamingData']['hlsManifestUrl'],
- compat_str))
- or url_or_none(try_get(
- video_info, lambda x: x['hlsvp'][0], compat_str)))
- if manifest_url:
- formats = []
- m3u8_formats = self._extract_m3u8_formats(
- manifest_url, video_id, 'mp4', fatal=False)
- for a_format in m3u8_formats:
- itag = self._search_regex(
- r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
- if itag:
- a_format['format_id'] = itag
- if itag in self._formats:
- dct = self._formats[itag].copy()
- dct.update(a_format)
- a_format = dct
- a_format['player_url'] = player_url
- # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
- a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
- formats.append(a_format)
- else:
- error_message = extract_unavailable_message()
- if not error_message:
- error_message = clean_html(try_get(
- player_response, lambda x: x['playabilityStatus']['reason'],
- compat_str))
- if not error_message:
- error_message = clean_html(
- try_get(video_info, lambda x: x['reason'][0], compat_str))
- if error_message:
- raise ExtractorError(error_message, expected=True)
- raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
-
- # uploader
- video_uploader = try_get(
- video_info, lambda x: x['author'][0],
- compat_str) or str_or_none(video_details.get('author'))
- if video_uploader:
- video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
- else:
- self._downloader.report_warning('unable to extract uploader name')
-
- # uploader_id
- video_uploader_id = None
- video_uploader_url = None
- mobj = re.search(
- r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
- video_webpage)
- if mobj is not None:
- video_uploader_id = mobj.group('uploader_id')
- video_uploader_url = mobj.group('uploader_url')
- else:
- self._downloader.report_warning('unable to extract uploader nickname')
-
- channel_id = (
- str_or_none(video_details.get('channelId'))
- or self._html_search_meta(
- 'channelId', video_webpage, 'channel id', default=None)
- or self._search_regex(
- r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
- video_webpage, 'channel id', default=None, group='id'))
- channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
-
- # thumbnail image
- # We try first to get a high quality image:
- m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
- video_webpage, re.DOTALL)
- if m_thumb is not None:
- video_thumbnail = m_thumb.group(1)
- elif 'thumbnail_url' not in video_info:
- self._downloader.report_warning('unable to extract video thumbnail')
- video_thumbnail = None
- else: # don't panic if we can't find it
- video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0])
-
- # upload date
- upload_date = self._html_search_meta(
- 'datePublished', video_webpage, 'upload date', default=None)
- if not upload_date:
- upload_date = self._search_regex(
- [r'(?s)id="eow-date.*?>(.*?)</span>',
- r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
- video_webpage, 'upload date', default=None)
- upload_date = unified_strdate(upload_date)
-
- video_license = self._html_search_regex(
- r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
- video_webpage, 'license', default=None)
-
- m_music = re.search(
- r'''(?x)
- <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
- <ul[^>]*>\s*
- <li>(?P<title>.+?)
- by (?P<creator>.+?)
- (?:
- \(.+?\)|
- <a[^>]*
- (?:
- \bhref=["\']/red[^>]*>| # drop possible
- >\s*Listen ad-free with YouTube Red # YouTube Red ad
- )
- .*?
- )?</li
- ''',
- video_webpage)
- if m_music:
- video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
- video_creator = clean_html(m_music.group('creator'))
- else:
- video_alt_title = video_creator = None
-
- def extract_meta(field):
- return self._html_search_regex(
- r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
- video_webpage, field, default=None)
-
- track = extract_meta('Song')
- artist = extract_meta('Artist')
- album = extract_meta('Album')
-
- # Youtube Music Auto-generated description
- release_date = release_year = None
- if video_description:
- mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
- if mobj:
- if not track:
- track = mobj.group('track').strip()
- if not artist:
- artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
- if not album:
- album = mobj.group('album'.strip())
- release_year = mobj.group('release_year')
- release_date = mobj.group('release_date')
- if release_date:
- release_date = release_date.replace('-', '')
- if not release_year:
- release_year = int(release_date[:4])
- if release_year:
- release_year = int(release_year)
-
- m_episode = re.search(
- r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
- video_webpage)
- if m_episode:
- series = unescapeHTML(m_episode.group('series'))
- season_number = int(m_episode.group('season'))
- episode_number = int(m_episode.group('episode'))
- else:
- series = season_number = episode_number = None
-
- m_cat_container = self._search_regex(
- r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
- video_webpage, 'categories', default=None)
- if m_cat_container:
- category = self._html_search_regex(
- r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
- default=None)
- video_categories = None if category is None else [category]
- else:
- video_categories = None
-
- video_tags = [
- unescapeHTML(m.group('content'))
- for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
-
- def _extract_count(count_name):
- return str_to_int(self._search_regex(
- r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
- % re.escape(count_name),
- video_webpage, count_name, default=None))
-
- like_count = _extract_count('like')
- dislike_count = _extract_count('dislike')
-
- if view_count is None:
- view_count = str_to_int(self._search_regex(
- r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
- 'view count', default=None))
-
- average_rating = (
- float_or_none(video_details.get('averageRating'))
- or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
-
- # subtitles
- video_subtitles = self.extract_subtitles(video_id, video_webpage)
- automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
-
- video_duration = try_get(
- video_info, lambda x: int_or_none(x['length_seconds'][0]))
- if not video_duration:
- video_duration = int_or_none(video_details.get('lengthSeconds'))
- if not video_duration:
- video_duration = parse_duration(self._html_search_meta(
- 'duration', video_webpage, 'video duration'))
-
- # annotations
- video_annotations = None
- if self._downloader.params.get('writeannotations', False):
- xsrf_token = self._search_regex(
- r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
- video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
- invideo_url = try_get(
- player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
- if xsrf_token and invideo_url:
- xsrf_field_name = self._search_regex(
- r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
- video_webpage, 'xsrf field name',
- group='xsrf_field_name', default='session_token')
- video_annotations = self._download_webpage(
- self._proto_relative_url(invideo_url),
- video_id, note='Downloading annotations',
- errnote='Unable to download video annotations', fatal=False,
- data=urlencode_postdata({xsrf_field_name: xsrf_token}))
-
- chapters = self._extract_chapters(description_original, video_duration)
-
- # Look for the DASH manifest
- if self._downloader.params.get('youtube_include_dash_manifest', True):
- dash_mpd_fatal = True
- for mpd_url in dash_mpds:
- dash_formats = {}
- try:
- def decrypt_sig(mobj):
- s = mobj.group(1)
- dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
- return '/signature/%s' % dec_s
-
- mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
-
- for df in self._extract_mpd_formats(
- mpd_url, video_id, fatal=dash_mpd_fatal,
- formats_dict=self._formats):
- if not df.get('filesize'):
- df['filesize'] = _extract_filesize(df['url'])
- # Do not overwrite DASH format found in some previous DASH manifest
- if df['format_id'] not in dash_formats:
- dash_formats[df['format_id']] = df
- # Additional DASH manifests may end up in HTTP Error 403 therefore
- # allow them to fail without bug report message if we already have
- # some DASH manifest succeeded. This is temporary workaround to reduce
- # burst of bug reports until we figure out the reason and whether it
- # can be fixed at all.
- dash_mpd_fatal = False
- except (ExtractorError, KeyError) as e:
- self.report_warning(
- 'Skipping DASH manifest: %r' % e, video_id)
- if dash_formats:
- # Remove the formats we found through non-DASH, they
- # contain less info and it can be wrong, because we use
- # fixed values (for example the resolution). See
- # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
- # example.
- formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
- formats.extend(dash_formats.values())
-
- # Check for malformed aspect ratio
- stretched_m = re.search(
- r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
- video_webpage)
- if stretched_m:
- w = float(stretched_m.group('w'))
- h = float(stretched_m.group('h'))
- # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
- # We will only process correct ratios.
- if w > 0 and h > 0:
- ratio = w / h
- for f in formats:
- if f.get('vcodec') != 'none':
- f['stretched_ratio'] = ratio
-
- if not formats:
- token = extract_token(video_info)
- if not token:
- if 'reason' in video_info:
- if 'The uploader has not made this video available in your country.' in video_info['reason']:
- regions_allowed = self._html_search_meta(
- 'regionsAllowed', video_webpage, default=None)
- countries = regions_allowed.split(',') if regions_allowed else None
- self.raise_geo_restricted(
- msg=video_info['reason'][0], countries=countries)
- reason = video_info['reason'][0]
- if 'Invalid parameters' in reason:
- unavailable_message = extract_unavailable_message()
- if unavailable_message:
- reason = unavailable_message
- raise ExtractorError(
- 'YouTube said: %s' % reason,
- expected=True, video_id=video_id)
- else:
- raise ExtractorError(
- '"token" parameter not in video info for unknown reason',
- video_id=video_id)
-
- if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])):
- raise ExtractorError('This video is DRM protected.', expected=True)
-
- self._sort_formats(formats)
-
- self.mark_watched(video_id, video_info, player_response)
-
- return {
- 'id': video_id,
- 'uploader': video_uploader,
- 'uploader_id': video_uploader_id,
- 'uploader_url': video_uploader_url,
- 'channel_id': channel_id,
- 'channel_url': channel_url,
- 'upload_date': upload_date,
- 'license': video_license,
- 'creator': video_creator or artist,
- 'title': video_title,
- 'alt_title': video_alt_title or track,
- 'thumbnail': video_thumbnail,
- 'description': video_description,
- 'categories': video_categories,
- 'tags': video_tags,
- 'subtitles': video_subtitles,
- 'automatic_captions': automatic_captions,
- 'duration': video_duration,
- 'age_limit': 18 if age_gate else 0,
- 'annotations': video_annotations,
- 'chapters': chapters,
- 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
- 'view_count': view_count,
- 'like_count': like_count,
- 'dislike_count': dislike_count,
- 'average_rating': average_rating,
- 'formats': formats,
- 'is_live': is_live,
- 'start_time': start_time,
- 'end_time': end_time,
- 'series': series,
- 'season_number': season_number,
- 'episode_number': episode_number,
- 'track': track,
- 'artist': artist,
- 'album': album,
- 'release_date': release_date,
- 'release_year': release_year,
- }
-
-
-class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
- IE_DESC = 'YouTube.com playlists'
- _VALID_URL = r"""(?x)(?:
- (?:https?://)?
- (?:\w+\.)?
- (?:
- (?:
- youtube\.com|
- invidio\.us
- )
- /
- (?:
- (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
- \? (?:.*?[&;])*? (?:p|a|list)=
- | p/
- )|
- youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
- )
- (
- (?:PL|LL|EC|UU|FL|RD|UL|TL|OLAK5uy_)?[0-9A-Za-z-_]{10,}
- # Top tracks, they can also include dots
- |(?:MC)[\w\.]*
- )
- .*
- |
- (%(playlist_id)s)
- )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
- _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
- _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
- _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
- IE_NAME = 'youtube:playlist'
- _TESTS = [{
- 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
- 'info_dict': {
- 'title': 'ytdl test PL',
- 'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
- },
- 'playlist_count': 3,
- }, {
- 'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
- 'info_dict': {
- 'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
- 'title': 'YDL_Empty_List',
- },
- 'playlist_count': 0,
- 'skip': 'This playlist is private',
- }, {
- 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
- 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
- 'info_dict': {
- 'title': '29C3: Not my department',
- 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
- 'uploader': 'Christiaan008',
- 'uploader_id': 'ChRiStIaAn008',
- },
- 'playlist_count': 95,
- }, {
- 'note': 'issue #673',
- 'url': 'PLBB231211A4F62143',
- 'info_dict': {
- 'title': '[OLD]Team Fortress 2 (Class-based LP)',
- 'id': 'PLBB231211A4F62143',
- 'uploader': 'Wickydoo',
- 'uploader_id': 'Wickydoo',
- },
- 'playlist_mincount': 26,
- }, {
- 'note': 'Large playlist',
- 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
- 'info_dict': {
- 'title': 'Uploads from Cauchemar',
- 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
- 'uploader': 'Cauchemar',
- 'uploader_id': 'Cauchemar89',
- },
- 'playlist_mincount': 799,
- }, {
- 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
- 'info_dict': {
- 'title': 'YDL_safe_search',
- 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
- },
- 'playlist_count': 2,
- 'skip': 'This playlist is private',
- }, {
- 'note': 'embedded',
- 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
- 'playlist_count': 4,
- 'info_dict': {
- 'title': 'JODA15',
- 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
- 'uploader': 'milan',
- 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
- }
- }, {
- 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
- 'playlist_mincount': 485,
- 'info_dict': {
- 'title': '2018 Chinese New Singles (11/6 updated)',
- 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
- 'uploader': 'LBK',
- 'uploader_id': 'sdragonfang',
- }
- }, {
- 'note': 'Embedded SWF player',
- 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
- 'playlist_count': 4,
- 'info_dict': {
- 'title': 'JODA7',
- 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
- },
- 'skip': 'This playlist does not exist',
- }, {
- 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
- 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
- 'info_dict': {
- 'title': 'Uploads from Interstellar Movie',
- 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
- 'uploader': 'Interstellar Movie',
- 'uploader_id': 'InterstellarMovie1',
- },
- 'playlist_mincount': 21,
- }, {
- # Playlist URL that does not actually serve a playlist
- 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
- 'info_dict': {
- 'id': 'FqZTN594JQw',
- 'ext': 'webm',
- 'title': "Smiley's People 01 detective, Adventure Series, Action",
- 'uploader': 'STREEM',
- 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
- 'upload_date': '20150526',
- 'license': 'Standard YouTube License',
- 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
- 'categories': ['People & Blogs'],
- 'tags': list,
- 'view_count': int,
- 'like_count': int,
- 'dislike_count': int,
- },
- 'params': {
- 'skip_download': True,
- },
- 'skip': 'This video is not available.',
- 'add_ie': [YoutubeIE.ie_key()],
- }, {
- 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
- 'info_dict': {
- 'id': 'yeWKywCrFtk',
- 'ext': 'mp4',
- 'title': 'Small Scale Baler and Braiding Rugs',
- 'uploader': 'Backus-Page House Museum',
- 'uploader_id': 'backuspagemuseum',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
- 'upload_date': '20161008',
- 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
- 'categories': ['Nonprofits & Activism'],
- 'tags': list,
- 'like_count': int,
- 'dislike_count': int,
- },
- 'params': {
- 'noplaylist': True,
- 'skip_download': True,
- },
- }, {
- # https://github.com/ytdl-org/youtube-dl/issues/21844
- 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
- 'info_dict': {
- 'title': 'Data Analysis with Dr Mike Pound',
- 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
- 'uploader_id': 'Computerphile',
- 'uploader': 'Computerphile',
- },
- 'playlist_mincount': 11,
- }, {
- 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
- 'only_matching': True,
- }, {
- 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
- 'only_matching': True,
- }, {
- # music album playlist
- 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
- 'only_matching': True,
- }, {
- 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
- 'only_matching': True,
- }]
-
- def _real_initialize(self):
- self._login()
-
- def extract_videos_from_page(self, page):
- ids_in_page = []
- titles_in_page = []
-
- for item in re.findall(
- r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
- attrs = extract_attributes(item)
- video_id = attrs['data-video-id']
- video_title = unescapeHTML(attrs.get('data-title'))
- if video_title:
- video_title = video_title.strip()
- ids_in_page.append(video_id)
- titles_in_page.append(video_title)
-
- # Fallback with old _VIDEO_RE
- self.extract_videos_from_page_impl(
- self._VIDEO_RE, page, ids_in_page, titles_in_page)
-
- # Relaxed fallbacks
- self.extract_videos_from_page_impl(
- r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
- ids_in_page, titles_in_page)
- self.extract_videos_from_page_impl(
- r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
- ids_in_page, titles_in_page)
-
- return zip(ids_in_page, titles_in_page)
-
- def _extract_mix(self, playlist_id):
- # The mixes are generated from a single video
- # the id of the playlist is just 'RD' + video_id
- ids = []
- last_id = playlist_id[-11:]
- for n in itertools.count(1):
- url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
- webpage = self._download_webpage(
- url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
- new_ids = orderedSet(re.findall(
- r'''(?xs)data-video-username=".*?".*?
- href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
- webpage))
- # Fetch new pages until all the videos are repeated, it seems that
- # there are always 51 unique videos.
- new_ids = [_id for _id in new_ids if _id not in ids]
- if not new_ids:
- break
- ids.extend(new_ids)
- last_id = ids[-1]
-
- url_results = self._ids_to_results(ids)
-
- search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
- title_span = (
- search_title('playlist-title')
- or search_title('title long-title')
- or search_title('title'))
- title = clean_html(title_span)
-
- return self.playlist_result(url_results, playlist_id, title)
-
- def _extract_playlist(self, playlist_id):
- url = self._TEMPLATE_URL % playlist_id
- page = self._download_webpage(url, playlist_id)
-
- # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
- for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
- match = match.strip()
- # Check if the playlist exists or is private
- mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
- if mobj:
- reason = mobj.group('reason')
- message = 'This playlist %s' % reason
- if 'private' in reason:
- message += ', use --username or --netrc to access it'
- message += '.'
- raise ExtractorError(message, expected=True)
- elif re.match(r'[^<]*Invalid parameters[^<]*', match):
- raise ExtractorError(
- 'Invalid parameters. Maybe URL is incorrect.',
- expected=True)
- elif re.match(r'[^<]*Choose your language[^<]*', match):
- continue
- else:
- self.report_warning('Youtube gives an alert message: ' + match)
-
- playlist_title = self._html_search_regex(
- r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
- page, 'title', default=None)
-
- _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
- uploader = self._html_search_regex(
- r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
- page, 'uploader', default=None)
- mobj = re.search(
- r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
- page)
- if mobj:
- uploader_id = mobj.group('uploader_id')
- uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
- else:
- uploader_id = uploader_url = None
-
- has_videos = True
-
- if not playlist_title:
- try:
- # Some playlist URLs don't actually serve a playlist (e.g.
- # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
- next(self._entries(page, playlist_id))
- except StopIteration:
- has_videos = False
-
- playlist = self.playlist_result(
- self._entries(page, playlist_id), playlist_id, playlist_title)
- playlist.update({
- 'uploader': uploader,
- 'uploader_id': uploader_id,
- 'uploader_url': uploader_url,
- })
-
- return has_videos, playlist
-
- def _check_download_just_video(self, url, playlist_id):
- # Check if it's a video-specific URL
- query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
- video_id = query_dict.get('v', [None])[0] or self._search_regex(
- r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
- 'video id', default=None)
- if video_id:
- if self._downloader.params.get('noplaylist'):
- self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
- return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
- else:
- self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
- return video_id, None
- return None, None
-
- def _real_extract(self, url):
- # Extract playlist id
- mobj = re.match(self._VALID_URL, url)
- if mobj is None:
- raise ExtractorError('Invalid URL: %s' % url)
- playlist_id = mobj.group(1) or mobj.group(2)
-
- video_id, video = self._check_download_just_video(url, playlist_id)
- if video:
- return video
-
- if playlist_id.startswith(('RD', 'UL', 'PU')):
- # Mixes require a custom extraction process
- return self._extract_mix(playlist_id)
-
- has_videos, playlist = self._extract_playlist(playlist_id)
- if has_videos or not video_id:
- return playlist
-
- # Some playlist URLs don't actually serve a playlist (see
- # https://github.com/ytdl-org/youtube-dl/issues/10537).
- # Fallback to plain video extraction if there is a video id
- # along with playlist id.
- return self.url_result(video_id, 'Youtube', video_id=video_id)
-
-
-class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
- IE_DESC = 'YouTube.com channels'
- _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
- _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
- _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
- IE_NAME = 'youtube:channel'
- _TESTS = [{
- 'note': 'paginated channel',
- 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
- 'playlist_mincount': 91,
- 'info_dict': {
- 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
- 'title': 'Uploads from lex will',
- 'uploader': 'lex will',
- 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
- }
- }, {
- 'note': 'Age restricted channel',
- # from https://www.youtube.com/user/DeusExOfficial
- 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
- 'playlist_mincount': 64,
- 'info_dict': {
- 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
- 'title': 'Uploads from Deus Ex',
- 'uploader': 'Deus Ex',
- 'uploader_id': 'DeusExOfficial',
- },
- }, {
- 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
- 'only_matching': True,
- }]
-
- @classmethod
- def suitable(cls, url):
- return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
- else super(YoutubeChannelIE, cls).suitable(url))
-
- def _build_template_url(self, url, channel_id):
- return self._TEMPLATE_URL % channel_id
-
- def _real_extract(self, url):
- channel_id = self._match_id(url)
-
- url = self._build_template_url(url, channel_id)
-
- # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
- # Workaround by extracting as a playlist if managed to obtain channel playlist URL
- # otherwise fallback on channel by page extraction
- channel_page = self._download_webpage(
- url + '?view=57', channel_id,
- 'Downloading channel page', fatal=False)
- if channel_page is False:
- channel_playlist_id = False
- else:
- channel_playlist_id = self._html_search_meta(
- 'channelId', channel_page, 'channel id', default=None)
- if not channel_playlist_id:
- channel_url = self._html_search_meta(
- ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
- channel_page, 'channel url', default=None)
- if channel_url:
- channel_playlist_id = self._search_regex(
- r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
- channel_url, 'channel id', default=None)
- if channel_playlist_id and channel_playlist_id.startswith('UC'):
- playlist_id = 'UU' + channel_playlist_id[2:]
- return self.url_result(
- compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
-
- channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
- autogenerated = re.search(r'''(?x)
- class="[^"]*?(?:
- channel-header-autogenerated-label|
- yt-channel-title-autogenerated
- )[^"]*"''', channel_page) is not None
-
- if autogenerated:
- # The videos are contained in a single page
- # the ajax pages can't be used, they are empty
- entries = [
- self.url_result(
- video_id, 'Youtube', video_id=video_id,
- video_title=video_title)
- for video_id, video_title in self.extract_videos_from_page(channel_page)]
- return self.playlist_result(entries, channel_id)
-
- try:
- next(self._entries(channel_page, channel_id))
- except StopIteration:
- alert_message = self._html_search_regex(
- r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
- channel_page, 'alert', default=None, group='alert')
- if alert_message:
- raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
-
- return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
-
-
-class YoutubeUserIE(YoutubeChannelIE):
- IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
- _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
- _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
- IE_NAME = 'youtube:user'
-
- _TESTS = [{
- 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
- 'playlist_mincount': 320,
- 'info_dict': {
- 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
- 'title': 'Uploads from The Linux Foundation',
- 'uploader': 'The Linux Foundation',
- 'uploader_id': 'TheLinuxFoundation',
- }
- }, {
- # Only available via https://www.youtube.com/c/12minuteathlete/videos
- # but not https://www.youtube.com/user/12minuteathlete/videos
- 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
- 'playlist_mincount': 249,
- 'info_dict': {
- 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
- 'title': 'Uploads from 12 Minute Athlete',
- 'uploader': '12 Minute Athlete',
- 'uploader_id': 'the12minuteathlete',
- }
- }, {
- 'url': 'ytuser:phihag',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtube.com/c/gametrailers',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtube.com/gametrailers',
- 'only_matching': True,
- }, {
- # This channel is not available, geo restricted to JP
- 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
- 'only_matching': True,
- }]
-
- @classmethod
- def suitable(cls, url):
- # Don't return True if the url can be extracted with other youtube
- # extractor, the regex would is too permissive and it would match.
- other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
- if any(ie.suitable(url) for ie in other_yt_ies):
- return False
- else:
- return super(YoutubeUserIE, cls).suitable(url)
-
- def _build_template_url(self, url, channel_id):
- mobj = re.match(self._VALID_URL, url)
- return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
-
-
-class YoutubeLiveIE(YoutubeBaseInfoExtractor):
- IE_DESC = 'YouTube.com live streams'
- _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
- IE_NAME = 'youtube:live'
-
- _TESTS = [{
- 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
- 'info_dict': {
- 'id': 'a48o2S1cPoo',
- 'ext': 'mp4',
- 'title': 'The Young Turks - Live Main Show',
- 'uploader': 'The Young Turks',
- 'uploader_id': 'TheYoungTurks',
- 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
- 'upload_date': '20150715',
- 'license': 'Standard YouTube License',
- 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
- 'categories': ['News & Politics'],
- 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
- 'like_count': int,
- 'dislike_count': int,
- },
- 'params': {
- 'skip_download': True,
- },
- }, {
- 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtube.com/TheYoungTurks/live',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- channel_id = mobj.group('id')
- base_url = mobj.group('base_url')
- webpage = self._download_webpage(url, channel_id, fatal=False)
- if webpage:
- page_type = self._og_search_property(
- 'type', webpage, 'page type', default='')
- video_id = self._html_search_meta(
- 'videoId', webpage, 'video id', default=None)
- if page_type.startswith('video') and video_id and re.match(
- r'^[0-9A-Za-z_-]{11}$', video_id):
- return self.url_result(video_id, YoutubeIE.ie_key())
- return self.url_result(base_url)
-
-
-class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
- IE_DESC = 'YouTube.com user/channel playlists'
- _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel)/(?P<id>[^/]+)/playlists'
- IE_NAME = 'youtube:playlists'
-
- _TESTS = [{
- 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
- 'playlist_mincount': 4,
- 'info_dict': {
- 'id': 'ThirstForScience',
- 'title': 'ThirstForScience',
- },
- }, {
- # with "Load more" button
- 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
- 'playlist_mincount': 70,
- 'info_dict': {
- 'id': 'igorkle1',
- 'title': 'Игорь Клейнер',
- },
- }, {
- 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
- 'playlist_mincount': 17,
- 'info_dict': {
- 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
- 'title': 'Chem Player',
- },
- 'skip': 'Blocked',
- }]
-
-
-class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
- _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
-
-
-class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
- IE_DESC = 'YouTube.com searches'
- # there doesn't appear to be a real limit, for example if you search for
- # 'python' you get more than 8.000.000 results
- _MAX_RESULTS = float('inf')
- IE_NAME = 'youtube:search'
- _SEARCH_KEY = 'ytsearch'
- _EXTRA_QUERY_ARGS = {}
- _TESTS = []
-
- def _get_n_results(self, query, n):
- """Get a specified number of results for a query"""
-
- videos = []
- limit = n
-
- url_query = {
- 'search_query': query.encode('utf-8'),
- }
- url_query.update(self._EXTRA_QUERY_ARGS)
- result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
-
- for pagenum in itertools.count(1):
- data = self._download_json(
- result_url, video_id='query "%s"' % query,
- note='Downloading page %s' % pagenum,
- errnote='Unable to download API page',
- query={'spf': 'navigate'})
- html_content = data[1]['body']['content']
-
- if 'class="search-message' in html_content:
- raise ExtractorError(
- '[youtube] No video results', expected=True)
-
- new_videos = list(self._process_page(html_content))
- videos += new_videos
- if not new_videos or len(videos) > limit:
- break
- next_link = self._html_search_regex(
- r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
- html_content, 'next link', default=None)
- if next_link is None:
- break
- result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
-
- if len(videos) > n:
- videos = videos[:n]
- return self.playlist_result(videos, query)
-
-
-class YoutubeSearchDateIE(YoutubeSearchIE):
- IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
- _SEARCH_KEY = 'ytsearchdate'
- IE_DESC = 'YouTube.com searches, newest videos first'
- _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
-
-
-class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
- IE_DESC = 'YouTube.com search URLs'
- IE_NAME = 'youtube:search_url'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
- _TESTS = [{
- 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
- 'playlist_mincount': 5,
- 'info_dict': {
- 'title': 'youtube-dl test video',
- }
- }, {
- 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- query = compat_urllib_parse_unquote_plus(mobj.group('query'))
- webpage = self._download_webpage(url, query)
- return self.playlist_result(self._process_page(webpage), playlist_title=query)
-
-
-class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
- IE_DESC = 'YouTube.com (multi-season) shows'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
- IE_NAME = 'youtube:show'
- _TESTS = [{
- 'url': 'https://www.youtube.com/show/airdisasters',
- 'playlist_mincount': 5,
- 'info_dict': {
- 'id': 'airdisasters',
- 'title': 'Air Disasters',
- }
- }]
-
- def _real_extract(self, url):
- playlist_id = self._match_id(url)
- return super(YoutubeShowIE, self)._real_extract(
- 'https://www.youtube.com/show/%s/playlists' % playlist_id)
-
-
-class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
- """
- Base class for feed extractors
- Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
- """
- _LOGIN_REQUIRED = True
-
- @property
- def IE_NAME(self):
- return 'youtube:%s' % self._FEED_NAME
-
- def _real_initialize(self):
- self._login()
-
- def _entries(self, page):
- # The extraction process is the same as for playlists, but the regex
- # for the video ids doesn't contain an index
- ids = []
- more_widget_html = content_html = page
- for page_num in itertools.count(1):
- matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
-
- # 'recommended' feed has infinite 'load more' and each new portion spins
- # the same videos in (sometimes) slightly different order, so we'll check
- # for unicity and break when portion has no new videos
- new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
- if not new_ids:
- break
-
- ids.extend(new_ids)
-
- for entry in self._ids_to_results(new_ids):
- yield entry
-
- mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
- if not mobj:
- break
-
- more = self._download_json(
- 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
- 'Downloading page #%s' % page_num,
- transform_source=uppercase_escape)
- content_html = more['content_html']
- more_widget_html = more['load_more_widget_html']
-
- def _real_extract(self, url):
- page = self._download_webpage(
- 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
- self._PLAYLIST_TITLE)
- return self.playlist_result(
- self._entries(page), playlist_title=self._PLAYLIST_TITLE)
-
-
-class YoutubeWatchLaterIE(YoutubePlaylistIE):
- IE_NAME = 'youtube:watchlater'
- IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
-
- _TESTS = [{
- 'url': 'https://www.youtube.com/playlist?list=WL',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- _, video = self._check_download_just_video(url, 'WL')
- if video:
- return video
- _, playlist = self._extract_playlist('WL')
- return playlist
-
-
-class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
- IE_NAME = 'youtube:favorites'
- IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
- _LOGIN_REQUIRED = True
-
- def _real_extract(self, url):
- webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
- playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
- return self.url_result(playlist_id, 'YoutubePlaylist')
-
-
-class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
- IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
- _FEED_NAME = 'recommended'
- _PLAYLIST_TITLE = 'Youtube Recommended videos'
-
-
-class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
- IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
- _FEED_NAME = 'subscriptions'
- _PLAYLIST_TITLE = 'Youtube Subscriptions'
-
-
-class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
- IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
- _FEED_NAME = 'history'
- _PLAYLIST_TITLE = 'Youtube History'
-
-
-class YoutubeTruncatedURLIE(InfoExtractor):
- IE_NAME = 'youtube:truncated_url'
- IE_DESC = False # Do not list
- _VALID_URL = r'''(?x)
- (?:https?://)?
- (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
- (?:watch\?(?:
- feature=[a-z_]+|
- annotation_id=annotation_[^&]+|
- x-yt-cl=[0-9]+|
- hl=[^&]*|
- t=[0-9]+
- )?
- |
- attribution_link\?a=[^&]+
- )
- $
- '''
-
- _TESTS = [{
- 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtube.com/watch?',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtube.com/watch?feature=foo',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtube.com/watch?hl=en-GB',
- 'only_matching': True,
- }, {
- 'url': 'https://www.youtube.com/watch?t=2372',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- raise ExtractorError(
- 'Did you forget to quote the URL? Remember that & is a meta '
- 'character in most shells, so you want to put the URL in quotes, '
- 'like youtube-dl '
- '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
- ' or simply youtube-dl BaW_jenozKc .',
- expected=True)
-
-
-class YoutubeTruncatedIDIE(InfoExtractor):
- IE_NAME = 'youtube:truncated_id'
- IE_DESC = False # Do not list
- _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
-
- _TESTS = [{
- 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
- 'only_matching': True,
- }]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
- raise ExtractorError(
- 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
- expected=True)
diff --git a/youtube_dl/extractor/zapiks.py b/youtube_dl/extractor/zapiks.py
deleted file mode 100644
index bacb82eee..000000000
--- a/youtube_dl/extractor/zapiks.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..utils import (
- parse_duration,
- parse_iso8601,
- xpath_with_ns,
- xpath_text,
- int_or_none,
-)
-
-
-class ZapiksIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?zapiks\.(?:fr|com)/(?:(?:[a-z]{2}/)?(?P<display_id>.+?)\.html|index\.php\?.*\bmedia_id=(?P<id>\d+))'
- _TESTS = [
- {
- 'url': 'http://www.zapiks.fr/ep2s3-bon-appetit-eh-be-viva.html',
- 'md5': 'aeb3c473b2d564b2d46d664d28d5f050',
- 'info_dict': {
- 'id': '80798',
- 'ext': 'mp4',
- 'title': 'EP2S3 - Bon Appétit - Eh bé viva les pyrénées con!',
- 'description': 'md5:7054d6f6f620c6519be1fe710d4da847',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 528,
- 'timestamp': 1359044972,
- 'upload_date': '20130124',
- 'view_count': int,
- 'comment_count': int,
- },
- },
- {
- 'url': 'http://www.zapiks.com/ep3s5-bon-appetit-baqueira-m-1.html',
- 'only_matching': True,
- },
- {
- 'url': 'http://www.zapiks.com/nl/ep3s5-bon-appetit-baqueira-m-1.html',
- 'only_matching': True,
- },
- {
- 'url': 'http://www.zapiks.fr/index.php?action=playerIframe&amp;media_id=118046&amp;width=640&amp;height=360&amp;autoStart=false&amp;language=fr',
- 'only_matching': True,
- },
- ]
-
- def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id')
- display_id = mobj.group('display_id') or video_id
-
- webpage = self._download_webpage(url, display_id)
-
- if not video_id:
- video_id = self._search_regex(
- r'data-media-id="(\d+)"', webpage, 'video id')
-
- playlist = self._download_xml(
- 'http://www.zapiks.fr/view/index.php?action=playlist&media_id=%s&lang=en' % video_id,
- display_id)
-
- NS_MAP = {
- 'jwplayer': 'http://rss.jwpcdn.com/'
- }
-
- def ns(path):
- return xpath_with_ns(path, NS_MAP)
-
- item = playlist.find('./channel/item')
-
- title = xpath_text(item, 'title', 'title') or self._og_search_title(webpage)
- description = self._og_search_description(webpage, default=None)
- thumbnail = xpath_text(
- item, ns('./jwplayer:image'), 'thumbnail') or self._og_search_thumbnail(webpage, default=None)
- duration = parse_duration(self._html_search_meta(
- 'duration', webpage, 'duration', default=None))
- timestamp = parse_iso8601(self._html_search_meta(
- 'uploadDate', webpage, 'upload date', default=None), ' ')
-
- view_count = int_or_none(self._search_regex(
- r'UserPlays:(\d+)', webpage, 'view count', default=None))
- comment_count = int_or_none(self._search_regex(
- r'UserComments:(\d+)', webpage, 'comment count', default=None))
-
- formats = []
- for source in item.findall(ns('./jwplayer:source')):
- format_id = source.attrib['label']
- f = {
- 'url': source.attrib['file'],
- 'format_id': format_id,
- }
- m = re.search(r'^(?P<height>\d+)[pP]', format_id)
- if m:
- f['height'] = int(m.group('height'))
- formats.append(f)
- self._sort_formats(formats)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': description,
- 'thumbnail': thumbnail,
- 'duration': duration,
- 'timestamp': timestamp,
- 'view_count': view_count,
- 'comment_count': comment_count,
- 'formats': formats,
- }
diff --git a/youtube_dl/extractor/zdf.py b/youtube_dl/extractor/zdf.py
deleted file mode 100644
index 145c123a4..000000000
--- a/youtube_dl/extractor/zdf.py
+++ /dev/null
@@ -1,320 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-from ..compat import compat_str
-from ..utils import (
- determine_ext,
- int_or_none,
- NO_DEFAULT,
- orderedSet,
- parse_codecs,
- qualities,
- try_get,
- unified_timestamp,
- update_url_query,
- url_or_none,
- urljoin,
-)
-
-
-class ZDFBaseIE(InfoExtractor):
- def _call_api(self, url, player, referrer, video_id, item):
- return self._download_json(
- url, video_id, 'Downloading JSON %s' % item,
- headers={
- 'Referer': referrer,
- 'Api-Auth': 'Bearer %s' % player['apiToken'],
- })
-
- def _extract_player(self, webpage, video_id, fatal=True):
- return self._parse_json(
- self._search_regex(
- r'(?s)data-zdfplayer-jsb=(["\'])(?P<json>{.+?})\1', webpage,
- 'player JSON', default='{}' if not fatal else NO_DEFAULT,
- group='json'),
- video_id)
-
-
-class ZDFIE(ZDFBaseIE):
- _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?]+)\.html'
- _QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh')
- _GEO_COUNTRIES = ['DE']
-
- _TESTS = [{
- 'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
- 'info_dict': {
- 'id': 'die-magie-der-farben-von-koenigspurpur-und-jeansblau-100',
- 'ext': 'mp4',
- 'title': 'Die Magie der Farben (2/2)',
- 'description': 'md5:a89da10c928c6235401066b60a6d5c1a',
- 'duration': 2615,
- 'timestamp': 1465021200,
- 'upload_date': '20160604',
- },
- }, {
- 'url': 'https://www.zdf.de/service-und-hilfe/die-neue-zdf-mediathek/zdfmediathek-trailer-100.html',
- 'only_matching': True,
- }, {
- 'url': 'https://www.zdf.de/filme/taunuskrimi/die-lebenden-und-die-toten-1---ein-taunuskrimi-100.html',
- 'only_matching': True,
- }, {
- 'url': 'https://www.zdf.de/dokumentation/planet-e/planet-e-uebersichtsseite-weitere-dokumentationen-von-planet-e-100.html',
- 'only_matching': True,
- }]
-
- @staticmethod
- def _extract_subtitles(src):
- subtitles = {}
- for caption in try_get(src, lambda x: x['captions'], list) or []:
- subtitle_url = url_or_none(caption.get('uri'))
- if subtitle_url:
- lang = caption.get('language', 'deu')
- subtitles.setdefault(lang, []).append({
- 'url': subtitle_url,
- })
- return subtitles
-
- def _extract_format(self, video_id, formats, format_urls, meta):
- format_url = url_or_none(meta.get('url'))
- if not format_url:
- return
- if format_url in format_urls:
- return
- format_urls.add(format_url)
- mime_type = meta.get('mimeType')
- ext = determine_ext(format_url)
- if mime_type == 'application/x-mpegURL' or ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
- format_url, video_id, 'mp4', m3u8_id='hls',
- entry_protocol='m3u8_native', fatal=False))
- elif mime_type == 'application/f4m+xml' or ext == 'f4m':
- formats.extend(self._extract_f4m_formats(
- update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False))
- else:
- f = parse_codecs(meta.get('mimeCodec'))
- format_id = ['http']
- for p in (meta.get('type'), meta.get('quality')):
- if p and isinstance(p, compat_str):
- format_id.append(p)
- f.update({
- 'url': format_url,
- 'format_id': '-'.join(format_id),
- 'format_note': meta.get('quality'),
- 'language': meta.get('language'),
- 'quality': qualities(self._QUALITIES)(meta.get('quality')),
- 'preference': -10,
- })
- formats.append(f)
-
- def _extract_entry(self, url, player, content, video_id):
- title = content.get('title') or content['teaserHeadline']
-
- t = content['mainVideoContent']['http://zdf.de/rels/target']
-
- ptmd_path = t.get('http://zdf.de/rels/streams/ptmd')
-
- if not ptmd_path:
- ptmd_path = t[
- 'http://zdf.de/rels/streams/ptmd-template'].replace(
- '{playerId}', 'portal')
-
- ptmd = self._call_api(
- urljoin(url, ptmd_path), player, url, video_id, 'metadata')
-
- formats = []
- track_uris = set()
- for p in ptmd['priorityList']:
- formitaeten = p.get('formitaeten')
- if not isinstance(formitaeten, list):
- continue
- for f in formitaeten:
- f_qualities = f.get('qualities')
- if not isinstance(f_qualities, list):
- continue
- for quality in f_qualities:
- tracks = try_get(quality, lambda x: x['audio']['tracks'], list)
- if not tracks:
- continue
- for track in tracks:
- self._extract_format(
- video_id, formats, track_uris, {
- 'url': track.get('uri'),
- 'type': f.get('type'),
- 'mimeType': f.get('mimeType'),
- 'quality': quality.get('quality'),
- 'language': track.get('language'),
- })
- self._sort_formats(formats)
-
- thumbnails = []
- layouts = try_get(
- content, lambda x: x['teaserImageRef']['layouts'], dict)
- if layouts:
- for layout_key, layout_url in layouts.items():
- layout_url = url_or_none(layout_url)
- if not layout_url:
- continue
- thumbnail = {
- 'url': layout_url,
- 'format_id': layout_key,
- }
- mobj = re.search(r'(?P<width>\d+)x(?P<height>\d+)', layout_key)
- if mobj:
- thumbnail.update({
- 'width': int(mobj.group('width')),
- 'height': int(mobj.group('height')),
- })
- thumbnails.append(thumbnail)
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': content.get('leadParagraph') or content.get('teasertext'),
- 'duration': int_or_none(t.get('duration')),
- 'timestamp': unified_timestamp(content.get('editorialDate')),
- 'thumbnails': thumbnails,
- 'subtitles': self._extract_subtitles(ptmd),
- 'formats': formats,
- }
-
- def _extract_regular(self, url, player, video_id):
- content = self._call_api(
- player['content'], player, url, video_id, 'content')
- return self._extract_entry(player['content'], player, content, video_id)
-
- def _extract_mobile(self, video_id):
- document = self._download_json(
- 'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id,
- video_id)['document']
-
- title = document['titel']
-
- formats = []
- format_urls = set()
- for f in document['formitaeten']:
- self._extract_format(video_id, formats, format_urls, f)
- self._sort_formats(formats)
-
- thumbnails = []
- teaser_bild = document.get('teaserBild')
- if isinstance(teaser_bild, dict):
- for thumbnail_key, thumbnail in teaser_bild.items():
- thumbnail_url = try_get(
- thumbnail, lambda x: x['url'], compat_str)
- if thumbnail_url:
- thumbnails.append({
- 'url': thumbnail_url,
- 'id': thumbnail_key,
- 'width': int_or_none(thumbnail.get('width')),
- 'height': int_or_none(thumbnail.get('height')),
- })
-
- return {
- 'id': video_id,
- 'title': title,
- 'description': document.get('beschreibung'),
- 'duration': int_or_none(document.get('length')),
- 'timestamp': unified_timestamp(try_get(
- document, lambda x: x['meta']['editorialDate'], compat_str)),
- 'thumbnails': thumbnails,
- 'subtitles': self._extract_subtitles(document),
- 'formats': formats,
- }
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id, fatal=False)
- if webpage:
- player = self._extract_player(webpage, url, fatal=False)
- if player:
- return self._extract_regular(url, player, video_id)
-
- return self._extract_mobile(video_id)
-
-
-class ZDFChannelIE(ZDFBaseIE):
- _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
- _TESTS = [{
- 'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio',
- 'info_dict': {
- 'id': 'das-aktuelle-sportstudio',
- 'title': 'das aktuelle sportstudio | ZDF',
- },
- 'playlist_count': 21,
- }, {
- 'url': 'https://www.zdf.de/dokumentation/planet-e',
- 'info_dict': {
- 'id': 'planet-e',
- 'title': 'planet e.',
- },
- 'playlist_count': 4,
- }, {
- 'url': 'https://www.zdf.de/filme/taunuskrimi/',
- 'only_matching': True,
- }]
-
- @classmethod
- def suitable(cls, url):
- return False if ZDFIE.suitable(url) else super(ZDFChannelIE, cls).suitable(url)
-
- def _real_extract(self, url):
- channel_id = self._match_id(url)
-
- webpage = self._download_webpage(url, channel_id)
-
- entries = [
- self.url_result(item_url, ie=ZDFIE.ie_key())
- for item_url in orderedSet(re.findall(
- r'data-plusbar-url=["\'](http.+?\.html)', webpage))]
-
- return self.playlist_result(
- entries, channel_id, self._og_search_title(webpage, fatal=False))
-
- r"""
- player = self._extract_player(webpage, channel_id)
-
- channel_id = self._search_regex(
- r'docId\s*:\s*(["\'])(?P<id>(?!\1).+?)\1', webpage,
- 'channel id', group='id')
-
- channel = self._call_api(
- 'https://api.zdf.de/content/documents/%s.json' % channel_id,
- player, url, channel_id)
-
- items = []
- for module in channel['module']:
- for teaser in try_get(module, lambda x: x['teaser'], list) or []:
- t = try_get(
- teaser, lambda x: x['http://zdf.de/rels/target'], dict)
- if not t:
- continue
- items.extend(try_get(
- t,
- lambda x: x['resultsWithVideo']['http://zdf.de/rels/search/results'],
- list) or [])
- items.extend(try_get(
- module,
- lambda x: x['filterRef']['resultsWithVideo']['http://zdf.de/rels/search/results'],
- list) or [])
-
- entries = []
- entry_urls = set()
- for item in items:
- t = try_get(item, lambda x: x['http://zdf.de/rels/target'], dict)
- if not t:
- continue
- sharing_url = t.get('http://zdf.de/rels/sharing-url')
- if not sharing_url or not isinstance(sharing_url, compat_str):
- continue
- if sharing_url in entry_urls:
- continue
- entry_urls.add(sharing_url)
- entries.append(self.url_result(
- sharing_url, ie=ZDFIE.ie_key(), video_id=t.get('id')))
-
- return self.playlist_result(entries, channel_id, channel.get('title'))
- """
diff --git a/youtube_dl/extractor/zype.py b/youtube_dl/extractor/zype.py
deleted file mode 100644
index 3b16e703b..000000000
--- a/youtube_dl/extractor/zype.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
-from .common import InfoExtractor
-
-
-class ZypeIE(InfoExtractor):
- _VALID_URL = r'https?://player\.zype\.com/embed/(?P<id>[\da-fA-F]+)\.js\?.*?api_key=[^&]+'
- _TEST = {
- 'url': 'https://player.zype.com/embed/5b400b834b32992a310622b9.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ&autoplay=false&controls=true&da=false',
- 'md5': 'eaee31d474c76a955bdaba02a505c595',
- 'info_dict': {
- 'id': '5b400b834b32992a310622b9',
- 'ext': 'mp4',
- 'title': 'Smoky Barbecue Favorites',
- 'thumbnail': r're:^https?://.*\.jpe?g',
- },
- }
-
- @staticmethod
- def _extract_urls(webpage):
- return [
- mobj.group('url')
- for mobj in re.finditer(
- r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//player\.zype\.com/embed/[\da-fA-F]+\.js\?.*?api_key=.+?)\1',
- webpage)]
-
- def _real_extract(self, url):
- video_id = self._match_id(url)
-
- webpage = self._download_webpage(url, video_id)
-
- title = self._search_regex(
- r'video_title\s*[:=]\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
- 'title', group='value')
-
- m3u8_url = self._search_regex(
- r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1', webpage,
- 'm3u8 url', group='url')
-
- formats = self._extract_m3u8_formats(
- m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
- m3u8_id='hls')
- self._sort_formats(formats)
-
- thumbnail = self._search_regex(
- r'poster\s*[:=]\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'thumbnail',
- default=False, group='url')
-
- return {
- 'id': video_id,
- 'title': title,
- 'thumbnail': thumbnail,
- 'formats': formats,
- }
diff --git a/youtube_dl/options.py b/youtube_dl/options.py
deleted file mode 100644
index 1ffabc62b..000000000
--- a/youtube_dl/options.py
+++ /dev/null
@@ -1,916 +0,0 @@
-from __future__ import unicode_literals
-
-import os.path
-import optparse
-import re
-import sys
-
-from .downloader.external import list_external_downloaders
-from .compat import (
- compat_expanduser,
- compat_get_terminal_size,
- compat_getenv,
- compat_kwargs,
- compat_shlex_split,
-)
-from .utils import (
- preferredencoding,
- write_string,
-)
-from .version import __version__
-
-
-def _hide_login_info(opts):
- PRIVATE_OPTS = set(['-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'])
- eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
-
- def _scrub_eq(o):
- m = eqre.match(o)
- if m:
- return m.group('key') + '=PRIVATE'
- else:
- return o
-
- opts = list(map(_scrub_eq, opts))
- for idx, opt in enumerate(opts):
- if opt in PRIVATE_OPTS and idx + 1 < len(opts):
- opts[idx + 1] = 'PRIVATE'
- return opts
-
-
-def parseOpts(overrideArguments=None):
- def _readOptions(filename_bytes, default=[]):
- try:
- optionf = open(filename_bytes)
- except IOError:
- return default # silently skip if file is not present
- try:
- # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
- contents = optionf.read()
- if sys.version_info < (3,):
- contents = contents.decode(preferredencoding())
- res = compat_shlex_split(contents, comments=True)
- finally:
- optionf.close()
- return res
-
- def _readUserConf():
- xdg_config_home = compat_getenv('XDG_CONFIG_HOME')
- if xdg_config_home:
- userConfFile = os.path.join(xdg_config_home, 'youtube-dl', 'config')
- if not os.path.isfile(userConfFile):
- userConfFile = os.path.join(xdg_config_home, 'youtube-dl.conf')
- else:
- userConfFile = os.path.join(compat_expanduser('~'), '.config', 'youtube-dl', 'config')
- if not os.path.isfile(userConfFile):
- userConfFile = os.path.join(compat_expanduser('~'), '.config', 'youtube-dl.conf')
- userConf = _readOptions(userConfFile, None)
-
- if userConf is None:
- appdata_dir = compat_getenv('appdata')
- if appdata_dir:
- userConf = _readOptions(
- os.path.join(appdata_dir, 'youtube-dl', 'config'),
- default=None)
- if userConf is None:
- userConf = _readOptions(
- os.path.join(appdata_dir, 'youtube-dl', 'config.txt'),
- default=None)
-
- if userConf is None:
- userConf = _readOptions(
- os.path.join(compat_expanduser('~'), 'youtube-dl.conf'),
- default=None)
- if userConf is None:
- userConf = _readOptions(
- os.path.join(compat_expanduser('~'), 'youtube-dl.conf.txt'),
- default=None)
-
- if userConf is None:
- userConf = []
-
- return userConf
-
- def _format_option_string(option):
- ''' ('-o', '--option') -> -o, --format METAVAR'''
-
- opts = []
-
- if option._short_opts:
- opts.append(option._short_opts[0])
- if option._long_opts:
- opts.append(option._long_opts[0])
- if len(opts) > 1:
- opts.insert(1, ', ')
-
- if option.takes_value():
- opts.append(' %s' % option.metavar)
-
- return ''.join(opts)
-
- def _comma_separated_values_options_callback(option, opt_str, value, parser):
- setattr(parser.values, option.dest, value.split(','))
-
- # No need to wrap help messages if we're on a wide console
- columns = compat_get_terminal_size().columns
- max_width = columns if columns else 80
- max_help_position = 80
-
- fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
- fmt.format_option_strings = _format_option_string
-
- kw = {
- 'version': __version__,
- 'formatter': fmt,
- 'usage': '%prog [OPTIONS] URL [URL...]',
- 'conflict_handler': 'resolve',
- }
-
- parser = optparse.OptionParser(**compat_kwargs(kw))
-
- general = optparse.OptionGroup(parser, 'General Options')
- general.add_option(
- '-h', '--help',
- action='help',
- help='Print this help text and exit')
- general.add_option(
- '-v', '--version',
- action='version',
- help='Print program version and exit')
- general.add_option(
- '-U', '--update',
- action='store_true', dest='update_self',
- help='Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
- general.add_option(
- '-i', '--ignore-errors',
- action='store_true', dest='ignoreerrors', default=False,
- help='Continue on download errors, for example to skip unavailable videos in a playlist')
- general.add_option(
- '--abort-on-error',
- action='store_false', dest='ignoreerrors',
- help='Abort downloading of further videos (in the playlist or the command line) if an error occurs')
- general.add_option(
- '--dump-user-agent',
- action='store_true', dest='dump_user_agent', default=False,
- help='Display the current browser identification')
- general.add_option(
- '--list-extractors',
- action='store_true', dest='list_extractors', default=False,
- help='List all supported extractors')
- general.add_option(
- '--extractor-descriptions',
- action='store_true', dest='list_extractor_descriptions', default=False,
- help='Output descriptions of all supported extractors')
- general.add_option(
- '--force-generic-extractor',
- action='store_true', dest='force_generic_extractor', default=False,
- help='Force extraction to use the generic extractor')
- general.add_option(
- '--default-search',
- dest='default_search', metavar='PREFIX',
- help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dl "large apple". Use the value "auto" to let youtube-dl guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.')
- general.add_option(
- '--ignore-config',
- action='store_true',
- help='Do not read configuration files. '
- 'When given in the global configuration file /etc/youtube-dl.conf: '
- 'Do not read the user configuration in ~/.config/youtube-dl/config '
- '(%APPDATA%/youtube-dl/config.txt on Windows)')
- general.add_option(
- '--config-location',
- dest='config_location', metavar='PATH',
- help='Location of the configuration file; either the path to the config or its containing directory.')
- general.add_option(
- '--flat-playlist',
- action='store_const', dest='extract_flat', const='in_playlist',
- default=False,
- help='Do not extract the videos of a playlist, only list them.')
- general.add_option(
- '--mark-watched',
- action='store_true', dest='mark_watched', default=False,
- help='Mark videos watched (YouTube only)')
- general.add_option(
- '--no-mark-watched',
- action='store_false', dest='mark_watched', default=False,
- help='Do not mark videos watched (YouTube only)')
- general.add_option(
- '--no-color', '--no-colors',
- action='store_true', dest='no_color',
- default=False,
- help='Do not emit color codes in output')
-
- network = optparse.OptionGroup(parser, 'Network Options')
- network.add_option(
- '--proxy', dest='proxy',
- default=None, metavar='URL',
- help='Use the specified HTTP/HTTPS/SOCKS proxy. To enable '
- 'SOCKS proxy, specify a proper scheme. For example '
- 'socks5://127.0.0.1:1080/. Pass in an empty string (--proxy "") '
- 'for direct connection')
- network.add_option(
- '--socket-timeout',
- dest='socket_timeout', type=float, default=None, metavar='SECONDS',
- help='Time to wait before giving up, in seconds')
- network.add_option(
- '--source-address',
- metavar='IP', dest='source_address', default=None,
- help='Client-side IP address to bind to',
- )
- network.add_option(
- '-4', '--force-ipv4',
- action='store_const', const='0.0.0.0', dest='source_address',
- help='Make all connections via IPv4',
- )
- network.add_option(
- '-6', '--force-ipv6',
- action='store_const', const='::', dest='source_address',
- help='Make all connections via IPv6',
- )
-
- geo = optparse.OptionGroup(parser, 'Geo Restriction')
- geo.add_option(
- '--geo-verification-proxy',
- dest='geo_verification_proxy', default=None, metavar='URL',
- help='Use this proxy to verify the IP address for some geo-restricted sites. '
- 'The default proxy specified by --proxy (or none, if the option is not present) is used for the actual downloading.')
- geo.add_option(
- '--cn-verification-proxy',
- dest='cn_verification_proxy', default=None, metavar='URL',
- help=optparse.SUPPRESS_HELP)
- geo.add_option(
- '--geo-bypass',
- action='store_true', dest='geo_bypass', default=True,
- help='Bypass geographic restriction via faking X-Forwarded-For HTTP header')
- geo.add_option(
- '--no-geo-bypass',
- action='store_false', dest='geo_bypass', default=True,
- help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header')
- geo.add_option(
- '--geo-bypass-country', metavar='CODE',
- dest='geo_bypass_country', default=None,
- help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code')
- geo.add_option(
- '--geo-bypass-ip-block', metavar='IP_BLOCK',
- dest='geo_bypass_ip_block', default=None,
- help='Force bypass geographic restriction with explicitly provided IP block in CIDR notation')
-
- selection = optparse.OptionGroup(parser, 'Video Selection')
- selection.add_option(
- '--playlist-start',
- dest='playliststart', metavar='NUMBER', default=1, type=int,
- help='Playlist video to start at (default is %default)')
- selection.add_option(
- '--playlist-end',
- dest='playlistend', metavar='NUMBER', default=None, type=int,
- help='Playlist video to end at (default is last)')
- selection.add_option(
- '--playlist-items',
- dest='playlist_items', metavar='ITEM_SPEC', default=None,
- help='Playlist video items to download. Specify indices of the videos in the playlist separated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.')
- selection.add_option(
- '--match-title',
- dest='matchtitle', metavar='REGEX',
- help='Download only matching titles (regex or caseless sub-string)')
- selection.add_option(
- '--reject-title',
- dest='rejecttitle', metavar='REGEX',
- help='Skip download for matching titles (regex or caseless sub-string)')
- selection.add_option(
- '--max-downloads',
- dest='max_downloads', metavar='NUMBER', type=int, default=None,
- help='Abort after downloading NUMBER files')
- selection.add_option(
- '--min-filesize',
- metavar='SIZE', dest='min_filesize', default=None,
- help='Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)')
- selection.add_option(
- '--max-filesize',
- metavar='SIZE', dest='max_filesize', default=None,
- help='Do not download any videos larger than SIZE (e.g. 50k or 44.6m)')
- selection.add_option(
- '--date',
- metavar='DATE', dest='date', default=None,
- help='Download only videos uploaded in this date')
- selection.add_option(
- '--datebefore',
- metavar='DATE', dest='datebefore', default=None,
- help='Download only videos uploaded on or before this date (i.e. inclusive)')
- selection.add_option(
- '--dateafter',
- metavar='DATE', dest='dateafter', default=None,
- help='Download only videos uploaded on or after this date (i.e. inclusive)')
- selection.add_option(
- '--min-views',
- metavar='COUNT', dest='min_views', default=None, type=int,
- help='Do not download any videos with less than COUNT views')
- selection.add_option(
- '--max-views',
- metavar='COUNT', dest='max_views', default=None, type=int,
- help='Do not download any videos with more than COUNT views')
- selection.add_option(
- '--match-filter',
- metavar='FILTER', dest='match_filter', default=None,
- help=(
- 'Generic video filter. '
- 'Specify any key (see the "OUTPUT TEMPLATE" for a list of available keys) to '
- 'match if the key is present, '
- '!key to check if the key is not present, '
- 'key > NUMBER (like "comment_count > 12", also works with '
- '>=, <, <=, !=, =) to compare against a number, '
- 'key = \'LITERAL\' (like "uploader = \'Mike Smith\'", also works with !=) '
- 'to match against a string literal '
- 'and & to require multiple matches. '
- 'Values which are not known are excluded unless you '
- 'put a question mark (?) after the operator. '
- 'For example, to only match videos that have been liked more than '
- '100 times and disliked less than 50 times (or the dislike '
- 'functionality is not available at the given service), but who '
- 'also have a description, use --match-filter '
- '"like_count > 100 & dislike_count <? 50 & description" .'
- ))
- selection.add_option(
- '--no-playlist',
- action='store_true', dest='noplaylist', default=False,
- help='Download only the video, if the URL refers to a video and a playlist.')
- selection.add_option(
- '--yes-playlist',
- action='store_false', dest='noplaylist', default=False,
- help='Download the playlist, if the URL refers to a video and a playlist.')
- selection.add_option(
- '--age-limit',
- metavar='YEARS', dest='age_limit', default=None, type=int,
- help='Download only videos suitable for the given age')
- selection.add_option(
- '--download-archive', metavar='FILE',
- dest='download_archive',
- help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')
- selection.add_option(
- '--include-ads',
- dest='include_ads', action='store_true',
- help='Download advertisements as well (experimental)')
-
- authentication = optparse.OptionGroup(parser, 'Authentication Options')
- authentication.add_option(
- '-u', '--username',
- dest='username', metavar='USERNAME',
- help='Login with this account ID')
- authentication.add_option(
- '-p', '--password',
- dest='password', metavar='PASSWORD',
- help='Account password. If this option is left out, youtube-dl will ask interactively.')
- authentication.add_option(
- '-2', '--twofactor',
- dest='twofactor', metavar='TWOFACTOR',
- help='Two-factor authentication code')
- authentication.add_option(
- '-n', '--netrc',
- action='store_true', dest='usenetrc', default=False,
- help='Use .netrc authentication data')
- authentication.add_option(
- '--video-password',
- dest='videopassword', metavar='PASSWORD',
- help='Video password (vimeo, smotri, youku)')
-
- adobe_pass = optparse.OptionGroup(parser, 'Adobe Pass Options')
- adobe_pass.add_option(
- '--ap-mso',
- dest='ap_mso', metavar='MSO',
- help='Adobe Pass multiple-system operator (TV provider) identifier, use --ap-list-mso for a list of available MSOs')
- adobe_pass.add_option(
- '--ap-username',
- dest='ap_username', metavar='USERNAME',
- help='Multiple-system operator account login')
- adobe_pass.add_option(
- '--ap-password',
- dest='ap_password', metavar='PASSWORD',
- help='Multiple-system operator account password. If this option is left out, youtube-dl will ask interactively.')
- adobe_pass.add_option(
- '--ap-list-mso',
- action='store_true', dest='ap_list_mso', default=False,
- help='List all supported multiple-system operators')
-
- video_format = optparse.OptionGroup(parser, 'Video Format Options')
- video_format.add_option(
- '-f', '--format',
- action='store', dest='format', metavar='FORMAT', default=None,
- help='Video format code, see the "FORMAT SELECTION" for all the info')
- video_format.add_option(
- '--all-formats',
- action='store_const', dest='format', const='all',
- help='Download all available video formats')
- video_format.add_option(
- '--prefer-free-formats',
- action='store_true', dest='prefer_free_formats', default=False,
- help='Prefer free video formats unless a specific one is requested')
- video_format.add_option(
- '-F', '--list-formats',
- action='store_true', dest='listformats',
- help='List all available formats of requested videos')
- video_format.add_option(
- '--youtube-include-dash-manifest',
- action='store_true', dest='youtube_include_dash_manifest', default=True,
- help=optparse.SUPPRESS_HELP)
- video_format.add_option(
- '--youtube-skip-dash-manifest',
- action='store_false', dest='youtube_include_dash_manifest',
- help='Do not download the DASH manifests and related data on YouTube videos')
- video_format.add_option(
- '--merge-output-format',
- action='store', dest='merge_output_format', metavar='FORMAT', default=None,
- help=(
- 'If a merge is required (e.g. bestvideo+bestaudio), '
- 'output to given container format. One of mkv, mp4, ogg, webm, flv. '
- 'Ignored if no merge is required'))
-
- subtitles = optparse.OptionGroup(parser, 'Subtitle Options')
- subtitles.add_option(
- '--write-sub', '--write-srt',
- action='store_true', dest='writesubtitles', default=False,
- help='Write subtitle file')
- subtitles.add_option(
- '--write-auto-sub', '--write-automatic-sub',
- action='store_true', dest='writeautomaticsub', default=False,
- help='Write automatically generated subtitle file (YouTube only)')
- subtitles.add_option(
- '--all-subs',
- action='store_true', dest='allsubtitles', default=False,
- help='Download all the available subtitles of the video')
- subtitles.add_option(
- '--list-subs',
- action='store_true', dest='listsubtitles', default=False,
- help='List all available subtitles for the video')
- subtitles.add_option(
- '--sub-format',
- action='store', dest='subtitlesformat', metavar='FORMAT', default='best',
- help='Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best"')
- subtitles.add_option(
- '--sub-lang', '--sub-langs', '--srt-lang',
- action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
- default=[], callback=_comma_separated_values_options_callback,
- help='Languages of the subtitles to download (optional) separated by commas, use --list-subs for available language tags')
-
- downloader = optparse.OptionGroup(parser, 'Download Options')
- downloader.add_option(
- '-r', '--limit-rate', '--rate-limit',
- dest='ratelimit', metavar='RATE',
- help='Maximum download rate in bytes per second (e.g. 50K or 4.2M)')
- downloader.add_option(
- '-R', '--retries',
- dest='retries', metavar='RETRIES', default=10,
- help='Number of retries (default is %default), or "infinite".')
- downloader.add_option(
- '--fragment-retries',
- dest='fragment_retries', metavar='RETRIES', default=10,
- help='Number of retries for a fragment (default is %default), or "infinite" (DASH, hlsnative and ISM)')
- downloader.add_option(
- '--skip-unavailable-fragments',
- action='store_true', dest='skip_unavailable_fragments', default=True,
- help='Skip unavailable fragments (DASH, hlsnative and ISM)')
- downloader.add_option(
- '--abort-on-unavailable-fragment',
- action='store_false', dest='skip_unavailable_fragments',
- help='Abort downloading when some fragment is not available')
- downloader.add_option(
- '--keep-fragments',
- action='store_true', dest='keep_fragments', default=False,
- help='Keep downloaded fragments on disk after downloading is finished; fragments are erased by default')
- downloader.add_option(
- '--buffer-size',
- dest='buffersize', metavar='SIZE', default='1024',
- help='Size of download buffer (e.g. 1024 or 16K) (default is %default)')
- downloader.add_option(
- '--no-resize-buffer',
- action='store_true', dest='noresizebuffer', default=False,
- help='Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.')
- downloader.add_option(
- '--http-chunk-size',
- dest='http_chunk_size', metavar='SIZE', default=None,
- help='Size of a chunk for chunk-based HTTP downloading (e.g. 10485760 or 10M) (default is disabled). '
- 'May be useful for bypassing bandwidth throttling imposed by a webserver (experimental)')
- downloader.add_option(
- '--test',
- action='store_true', dest='test', default=False,
- help=optparse.SUPPRESS_HELP)
- downloader.add_option(
- '--playlist-reverse',
- action='store_true',
- help='Download playlist videos in reverse order')
- downloader.add_option(
- '--playlist-random',
- action='store_true',
- help='Download playlist videos in random order')
- downloader.add_option(
- '--xattr-set-filesize',
- dest='xattr_set_filesize', action='store_true',
- help='Set file xattribute ytdl.filesize with expected file size')
- downloader.add_option(
- '--hls-prefer-native',
- dest='hls_prefer_native', action='store_true', default=None,
- help='Use the native HLS downloader instead of ffmpeg')
- downloader.add_option(
- '--hls-prefer-ffmpeg',
- dest='hls_prefer_native', action='store_false', default=None,
- help='Use ffmpeg instead of the native HLS downloader')
- downloader.add_option(
- '--hls-use-mpegts',
- dest='hls_use_mpegts', action='store_true',
- help='Use the mpegts container for HLS videos, allowing to play the '
- 'video while downloading (some players may not be able to play it)')
- downloader.add_option(
- '--external-downloader',
- dest='external_downloader', metavar='COMMAND',
- help='Use the specified external downloader. '
- 'Currently supports %s' % ','.join(list_external_downloaders()))
- downloader.add_option(
- '--external-downloader-args',
- dest='external_downloader_args', metavar='ARGS',
- help='Give these arguments to the external downloader')
-
- workarounds = optparse.OptionGroup(parser, 'Workarounds')
- workarounds.add_option(
- '--encoding',
- dest='encoding', metavar='ENCODING',
- help='Force the specified encoding (experimental)')
- workarounds.add_option(
- '--no-check-certificate',
- action='store_true', dest='no_check_certificate', default=False,
- help='Suppress HTTPS certificate validation')
- workarounds.add_option(
- '--prefer-insecure',
- '--prefer-unsecure', action='store_true', dest='prefer_insecure',
- help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
- workarounds.add_option(
- '--user-agent',
- metavar='UA', dest='user_agent',
- help='Specify a custom user agent')
- workarounds.add_option(
- '--referer',
- metavar='URL', dest='referer', default=None,
- help='Specify a custom referer, use if the video access is restricted to one domain',
- )
- workarounds.add_option(
- '--add-header',
- metavar='FIELD:VALUE', dest='headers', action='append',
- help='Specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
- )
- workarounds.add_option(
- '--bidi-workaround',
- dest='bidi_workaround', action='store_true',
- help='Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
- workarounds.add_option(
- '--sleep-interval', '--min-sleep-interval', metavar='SECONDS',
- dest='sleep_interval', type=float,
- help=(
- 'Number of seconds to sleep before each download when used alone '
- 'or a lower bound of a range for randomized sleep before each download '
- '(minimum possible number of seconds to sleep) when used along with '
- '--max-sleep-interval.'))
- workarounds.add_option(
- '--max-sleep-interval', metavar='SECONDS',
- dest='max_sleep_interval', type=float,
- help=(
- 'Upper bound of a range for randomized sleep before each download '
- '(maximum possible number of seconds to sleep). Must only be used '
- 'along with --min-sleep-interval.'))
-
- verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
- verbosity.add_option(
- '-q', '--quiet',
- action='store_true', dest='quiet', default=False,
- help='Activate quiet mode')
- verbosity.add_option(
- '--no-warnings',
- dest='no_warnings', action='store_true', default=False,
- help='Ignore warnings')
- verbosity.add_option(
- '-s', '--simulate',
- action='store_true', dest='simulate', default=False,
- help='Do not download the video and do not write anything to disk')
- verbosity.add_option(
- '--skip-download',
- action='store_true', dest='skip_download', default=False,
- help='Do not download the video')
- verbosity.add_option(
- '-g', '--get-url',
- action='store_true', dest='geturl', default=False,
- help='Simulate, quiet but print URL')
- verbosity.add_option(
- '-e', '--get-title',
- action='store_true', dest='gettitle', default=False,
- help='Simulate, quiet but print title')
- verbosity.add_option(
- '--get-id',
- action='store_true', dest='getid', default=False,
- help='Simulate, quiet but print id')
- verbosity.add_option(
- '--get-thumbnail',
- action='store_true', dest='getthumbnail', default=False,
- help='Simulate, quiet but print thumbnail URL')
- verbosity.add_option(
- '--get-description',
- action='store_true', dest='getdescription', default=False,
- help='Simulate, quiet but print video description')
- verbosity.add_option(
- '--get-duration',
- action='store_true', dest='getduration', default=False,
- help='Simulate, quiet but print video length')
- verbosity.add_option(
- '--get-filename',
- action='store_true', dest='getfilename', default=False,
- help='Simulate, quiet but print output filename')
- verbosity.add_option(
- '--get-format',
- action='store_true', dest='getformat', default=False,
- help='Simulate, quiet but print output format')
- verbosity.add_option(
- '-j', '--dump-json',
- action='store_true', dest='dumpjson', default=False,
- help='Simulate, quiet but print JSON information. See the "OUTPUT TEMPLATE" for a description of available keys.')
- verbosity.add_option(
- '-J', '--dump-single-json',
- action='store_true', dest='dump_single_json', default=False,
- help='Simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.')
- verbosity.add_option(
- '--print-json',
- action='store_true', dest='print_json', default=False,
- help='Be quiet and print the video information as JSON (video is still being downloaded).',
- )
- verbosity.add_option(
- '--newline',
- action='store_true', dest='progress_with_newline', default=False,
- help='Output progress bar as new lines')
- verbosity.add_option(
- '--no-progress',
- action='store_true', dest='noprogress', default=False,
- help='Do not print progress bar')
- verbosity.add_option(
- '--console-title',
- action='store_true', dest='consoletitle', default=False,
- help='Display progress in console titlebar')
- verbosity.add_option(
- '-v', '--verbose',
- action='store_true', dest='verbose', default=False,
- help='Print various debugging information')
- verbosity.add_option(
- '--dump-pages', '--dump-intermediate-pages',
- action='store_true', dest='dump_intermediate_pages', default=False,
- help='Print downloaded pages encoded using base64 to debug problems (very verbose)')
- verbosity.add_option(
- '--write-pages',
- action='store_true', dest='write_pages', default=False,
- help='Write downloaded intermediary pages to files in the current directory to debug problems')
- verbosity.add_option(
- '--youtube-print-sig-code',
- action='store_true', dest='youtube_print_sig_code', default=False,
- help=optparse.SUPPRESS_HELP)
- verbosity.add_option(
- '--print-traffic', '--dump-headers',
- dest='debug_printtraffic', action='store_true', default=False,
- help='Display sent and read HTTP traffic')
- verbosity.add_option(
- '-C', '--call-home',
- dest='call_home', action='store_true', default=False,
- help='Contact the youtube-dl server for debugging')
- verbosity.add_option(
- '--no-call-home',
- dest='call_home', action='store_false', default=False,
- help='Do NOT contact the youtube-dl server for debugging')
-
- filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
- filesystem.add_option(
- '-a', '--batch-file',
- dest='batchfile', metavar='FILE',
- help="File containing URLs to download ('-' for stdin), one URL per line. "
- "Lines starting with '#', ';' or ']' are considered as comments and ignored.")
- filesystem.add_option(
- '--id', default=False,
- action='store_true', dest='useid', help='Use only video ID in file name')
- filesystem.add_option(
- '-o', '--output',
- dest='outtmpl', metavar='TEMPLATE',
- help=('Output filename template, see the "OUTPUT TEMPLATE" for all the info'))
- filesystem.add_option(
- '--autonumber-size',
- dest='autonumber_size', metavar='NUMBER', type=int,
- help=optparse.SUPPRESS_HELP)
- filesystem.add_option(
- '--autonumber-start',
- dest='autonumber_start', metavar='NUMBER', default=1, type=int,
- help='Specify the start value for %(autonumber)s (default is %default)')
- filesystem.add_option(
- '--restrict-filenames',
- action='store_true', dest='restrictfilenames', default=False,
- help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames')
- filesystem.add_option(
- '-A', '--auto-number',
- action='store_true', dest='autonumber', default=False,
- help=optparse.SUPPRESS_HELP)
- filesystem.add_option(
- '-t', '--title',
- action='store_true', dest='usetitle', default=False,
- help=optparse.SUPPRESS_HELP)
- filesystem.add_option(
- '-l', '--literal', default=False,
- action='store_true', dest='usetitle',
- help=optparse.SUPPRESS_HELP)
- filesystem.add_option(
- '-w', '--no-overwrites',
- action='store_true', dest='nooverwrites', default=False,
- help='Do not overwrite files')
- filesystem.add_option(
- '-c', '--continue',
- action='store_true', dest='continue_dl', default=True,
- help='Force resume of partially downloaded files. By default, youtube-dl will resume downloads if possible.')
- filesystem.add_option(
- '--no-continue',
- action='store_false', dest='continue_dl',
- help='Do not resume partially downloaded files (restart from beginning)')
- filesystem.add_option(
- '--no-part',
- action='store_true', dest='nopart', default=False,
- help='Do not use .part files - write directly into output file')
- filesystem.add_option(
- '--no-mtime',
- action='store_false', dest='updatetime', default=True,
- help='Do not use the Last-modified header to set the file modification time')
- filesystem.add_option(
- '--write-description',
- action='store_true', dest='writedescription', default=False,
- help='Write video description to a .description file')
- filesystem.add_option(
- '--write-info-json',
- action='store_true', dest='writeinfojson', default=False,
- help='Write video metadata to a .info.json file')
- filesystem.add_option(
- '--write-annotations',
- action='store_true', dest='writeannotations', default=False,
- help='Write video annotations to a .annotations.xml file')
- filesystem.add_option(
- '--load-info-json', '--load-info',
- dest='load_info_filename', metavar='FILE',
- help='JSON file containing the video information (created with the "--write-info-json" option)')
- filesystem.add_option(
- '--cookies',
- dest='cookiefile', metavar='FILE',
- help='File to read cookies from and dump cookie jar in')
- filesystem.add_option(
- '--cache-dir', dest='cachedir', default=None, metavar='DIR',
- help='Location in the filesystem where youtube-dl can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dl or ~/.cache/youtube-dl . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
- filesystem.add_option(
- '--no-cache-dir', action='store_const', const=False, dest='cachedir',
- help='Disable filesystem caching')
- filesystem.add_option(
- '--rm-cache-dir',
- action='store_true', dest='rm_cachedir',
- help='Delete all filesystem cache files')
-
- thumbnail = optparse.OptionGroup(parser, 'Thumbnail images')
- thumbnail.add_option(
- '--write-thumbnail',
- action='store_true', dest='writethumbnail', default=False,
- help='Write thumbnail image to disk')
- thumbnail.add_option(
- '--write-all-thumbnails',
- action='store_true', dest='write_all_thumbnails', default=False,
- help='Write all thumbnail image formats to disk')
- thumbnail.add_option(
- '--list-thumbnails',
- action='store_true', dest='list_thumbnails', default=False,
- help='Simulate and list all available thumbnail formats')
-
- postproc = optparse.OptionGroup(parser, 'Post-processing Options')
- postproc.add_option(
- '-x', '--extract-audio',
- action='store_true', dest='extractaudio', default=False,
- help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
- postproc.add_option(
- '--audio-format', metavar='FORMAT', dest='audioformat', default='best',
- help='Specify audio format: "best", "aac", "flac", "mp3", "m4a", "opus", "vorbis", or "wav"; "%default" by default; No effect without -x')
- postproc.add_option(
- '--audio-quality', metavar='QUALITY',
- dest='audioquality', default='5',
- help='Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)')
- postproc.add_option(
- '--recode-video',
- metavar='FORMAT', dest='recodevideo', default=None,
- help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv|avi)')
- postproc.add_option(
- '--postprocessor-args',
- dest='postprocessor_args', metavar='ARGS',
- help='Give these arguments to the postprocessor')
- postproc.add_option(
- '-k', '--keep-video',
- action='store_true', dest='keepvideo', default=False,
- help='Keep the video file on disk after the post-processing; the video is erased by default')
- postproc.add_option(
- '--no-post-overwrites',
- action='store_true', dest='nopostoverwrites', default=False,
- help='Do not overwrite post-processed files; the post-processed files are overwritten by default')
- postproc.add_option(
- '--embed-subs',
- action='store_true', dest='embedsubtitles', default=False,
- help='Embed subtitles in the video (only for mp4, webm and mkv videos)')
- postproc.add_option(
- '--embed-thumbnail',
- action='store_true', dest='embedthumbnail', default=False,
- help='Embed thumbnail in the audio as cover art')
- postproc.add_option(
- '--add-metadata',
- action='store_true', dest='addmetadata', default=False,
- help='Write metadata to the video file')
- postproc.add_option(
- '--metadata-from-title',
- metavar='FORMAT', dest='metafromtitle',
- help='Parse additional metadata like song title / artist from the video title. '
- 'The format syntax is the same as --output. Regular expression with '
- 'named capture groups may also be used. '
- 'The parsed parameters replace existing values. '
- 'Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like '
- '"Coldplay - Paradise". '
- 'Example (regex): --metadata-from-title "(?P<artist>.+?) - (?P<title>.+)"')
- postproc.add_option(
- '--xattrs',
- action='store_true', dest='xattrs', default=False,
- help='Write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
- postproc.add_option(
- '--fixup',
- metavar='POLICY', dest='fixup', default='detect_or_warn',
- help='Automatically correct known faults of the file. '
- 'One of never (do nothing), warn (only emit a warning), '
- 'detect_or_warn (the default; fix file if we can, warn otherwise)')
- postproc.add_option(
- '--prefer-avconv',
- action='store_false', dest='prefer_ffmpeg',
- help='Prefer avconv over ffmpeg for running the postprocessors')
- postproc.add_option(
- '--prefer-ffmpeg',
- action='store_true', dest='prefer_ffmpeg',
- help='Prefer ffmpeg over avconv for running the postprocessors (default)')
- postproc.add_option(
- '--ffmpeg-location', '--avconv-location', metavar='PATH',
- dest='ffmpeg_location',
- help='Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory.')
- postproc.add_option(
- '--exec',
- metavar='CMD', dest='exec_cmd',
- help='Execute a command on the file after downloading, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'')
- postproc.add_option(
- '--convert-subs', '--convert-subtitles',
- metavar='FORMAT', dest='convertsubtitles', default=None,
- help='Convert the subtitles to other format (currently supported: srt|ass|vtt|lrc)')
-
- parser.add_option_group(general)
- parser.add_option_group(network)
- parser.add_option_group(geo)
- parser.add_option_group(selection)
- parser.add_option_group(downloader)
- parser.add_option_group(filesystem)
- parser.add_option_group(thumbnail)
- parser.add_option_group(verbosity)
- parser.add_option_group(workarounds)
- parser.add_option_group(video_format)
- parser.add_option_group(subtitles)
- parser.add_option_group(authentication)
- parser.add_option_group(adobe_pass)
- parser.add_option_group(postproc)
-
- if overrideArguments is not None:
- opts, args = parser.parse_args(overrideArguments)
- if opts.verbose:
- write_string('[debug] Override config: ' + repr(overrideArguments) + '\n')
- else:
- def compat_conf(conf):
- if sys.version_info < (3,):
- return [a.decode(preferredencoding(), 'replace') for a in conf]
- return conf
-
- command_line_conf = compat_conf(sys.argv[1:])
- opts, args = parser.parse_args(command_line_conf)
-
- system_conf = user_conf = custom_conf = []
-
- if '--config-location' in command_line_conf:
- location = compat_expanduser(opts.config_location)
- if os.path.isdir(location):
- location = os.path.join(location, 'youtube-dl.conf')
- if not os.path.exists(location):
- parser.error('config-location %s does not exist.' % location)
- custom_conf = _readOptions(location)
- elif '--ignore-config' in command_line_conf:
- pass
- else:
- system_conf = _readOptions('/etc/youtube-dl.conf')
- if '--ignore-config' not in system_conf:
- user_conf = _readUserConf()
-
- argv = system_conf + user_conf + custom_conf + command_line_conf
- opts, args = parser.parse_args(argv)
- if opts.verbose:
- for conf_label, conf in (
- ('System config', system_conf),
- ('User config', user_conf),
- ('Custom config', custom_conf),
- ('Command-line args', command_line_conf)):
- write_string('[debug] %s: %s\n' % (conf_label, repr(_hide_login_info(conf))))
-
- return parser, opts, args
diff --git a/youtube_dl/postprocessor/embedthumbnail.py b/youtube_dl/postprocessor/embedthumbnail.py
deleted file mode 100644
index 56be914b8..000000000
--- a/youtube_dl/postprocessor/embedthumbnail.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-
-import os
-import subprocess
-
-from .ffmpeg import FFmpegPostProcessor
-
-from ..utils import (
- check_executable,
- encodeArgument,
- encodeFilename,
- PostProcessingError,
- prepend_extension,
- shell_quote
-)
-
-
-class EmbedThumbnailPPError(PostProcessingError):
- pass
-
-
-class EmbedThumbnailPP(FFmpegPostProcessor):
- def __init__(self, downloader=None, already_have_thumbnail=False):
- super(EmbedThumbnailPP, self).__init__(downloader)
- self._already_have_thumbnail = already_have_thumbnail
-
- def run(self, info):
- filename = info['filepath']
- temp_filename = prepend_extension(filename, 'temp')
-
- if not info.get('thumbnails'):
- self._downloader.to_screen('[embedthumbnail] There aren\'t any thumbnails to embed')
- return [], info
-
- thumbnail_filename = info['thumbnails'][-1]['filename']
-
- if not os.path.exists(encodeFilename(thumbnail_filename)):
- self._downloader.report_warning(
- 'Skipping embedding the thumbnail because the file is missing.')
- return [], info
-
- if info['ext'] == 'mp3':
- options = [
- '-c', 'copy', '-map', '0', '-map', '1',
- '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment="Cover (Front)"']
-
- self._downloader.to_screen('[ffmpeg] Adding thumbnail to "%s"' % filename)
-
- self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
-
- if not self._already_have_thumbnail:
- os.remove(encodeFilename(thumbnail_filename))
- os.remove(encodeFilename(filename))
- os.rename(encodeFilename(temp_filename), encodeFilename(filename))
-
- elif info['ext'] in ['m4a', 'mp4']:
- if not check_executable('AtomicParsley', ['-v']):
- raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.')
-
- cmd = [encodeFilename('AtomicParsley', True),
- encodeFilename(filename, True),
- encodeArgument('--artwork'),
- encodeFilename(thumbnail_filename, True),
- encodeArgument('-o'),
- encodeFilename(temp_filename, True)]
-
- self._downloader.to_screen('[atomicparsley] Adding thumbnail to "%s"' % filename)
-
- if self._downloader.params.get('verbose', False):
- self._downloader.to_screen('[debug] AtomicParsley command line: %s' % shell_quote(cmd))
-
- p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- stdout, stderr = p.communicate()
-
- if p.returncode != 0:
- msg = stderr.decode('utf-8', 'replace').strip()
- raise EmbedThumbnailPPError(msg)
-
- if not self._already_have_thumbnail:
- os.remove(encodeFilename(thumbnail_filename))
- # for formats that don't support thumbnails (like 3gp) AtomicParsley
- # won't create to the temporary file
- if b'No changes' in stdout:
- self._downloader.report_warning('The file format doesn\'t support embedding a thumbnail')
- else:
- os.remove(encodeFilename(filename))
- os.rename(encodeFilename(temp_filename), encodeFilename(filename))
- else:
- raise EmbedThumbnailPPError('Only mp3 and m4a/mp4 are supported for thumbnail embedding for now.')
-
- return [], info
diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py
deleted file mode 100644
index 70416c25e..000000000
--- a/youtube_dl/postprocessor/ffmpeg.py
+++ /dev/null
@@ -1,646 +0,0 @@
-from __future__ import unicode_literals
-
-import io
-import os
-import subprocess
-import time
-import re
-
-
-from .common import AudioConversionError, PostProcessor
-
-from ..utils import (
- encodeArgument,
- encodeFilename,
- get_exe_version,
- is_outdated_version,
- PostProcessingError,
- prepend_extension,
- shell_quote,
- subtitles_filename,
- dfxp2srt,
- ISO639Utils,
- replace_extension,
-)
-
-
-EXT_TO_OUT_FORMATS = {
- 'aac': 'adts',
- 'flac': 'flac',
- 'm4a': 'ipod',
- 'mka': 'matroska',
- 'mkv': 'matroska',
- 'mpg': 'mpeg',
- 'ogv': 'ogg',
- 'ts': 'mpegts',
- 'wma': 'asf',
- 'wmv': 'asf',
-}
-ACODECS = {
- 'mp3': 'libmp3lame',
- 'aac': 'aac',
- 'flac': 'flac',
- 'm4a': 'aac',
- 'opus': 'libopus',
- 'vorbis': 'libvorbis',
- 'wav': None,
-}
-
-
-class FFmpegPostProcessorError(PostProcessingError):
- pass
-
-
-class FFmpegPostProcessor(PostProcessor):
- def __init__(self, downloader=None):
- PostProcessor.__init__(self, downloader)
- self._determine_executables()
-
- def check_version(self):
- if not self.available:
- raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.')
-
- required_version = '10-0' if self.basename == 'avconv' else '1.0'
- if is_outdated_version(
- self._versions[self.basename], required_version):
- warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
- self.basename, self.basename, required_version)
- if self._downloader:
- self._downloader.report_warning(warning)
-
- @staticmethod
- def get_versions(downloader=None):
- return FFmpegPostProcessor(downloader)._versions
-
- def _determine_executables(self):
- programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
- prefer_ffmpeg = True
-
- def get_ffmpeg_version(path):
- ver = get_exe_version(path, args=['-version'])
- if ver:
- regexs = [
- r'(?:\d+:)?([0-9.]+)-[0-9]+ubuntu[0-9.]+$', # Ubuntu, see [1]
- r'n([0-9.]+)$', # Arch Linux
- # 1. http://www.ducea.com/2006/06/17/ubuntu-package-version-naming-explanation/
- ]
- for regex in regexs:
- mobj = re.match(regex, ver)
- if mobj:
- ver = mobj.group(1)
- return ver
-
- self.basename = None
- self.probe_basename = None
-
- self._paths = None
- self._versions = None
- if self._downloader:
- prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', True)
- location = self._downloader.params.get('ffmpeg_location')
- if location is not None:
- if not os.path.exists(location):
- self._downloader.report_warning(
- 'ffmpeg-location %s does not exist! '
- 'Continuing without avconv/ffmpeg.' % (location))
- self._versions = {}
- return
- elif not os.path.isdir(location):
- basename = os.path.splitext(os.path.basename(location))[0]
- if basename not in programs:
- self._downloader.report_warning(
- 'Cannot identify executable %s, its basename should be one of %s. '
- 'Continuing without avconv/ffmpeg.' %
- (location, ', '.join(programs)))
- self._versions = {}
- return None
- location = os.path.dirname(os.path.abspath(location))
- if basename in ('ffmpeg', 'ffprobe'):
- prefer_ffmpeg = True
-
- self._paths = dict(
- (p, os.path.join(location, p)) for p in programs)
- self._versions = dict(
- (p, get_ffmpeg_version(self._paths[p])) for p in programs)
- if self._versions is None:
- self._versions = dict(
- (p, get_ffmpeg_version(p)) for p in programs)
- self._paths = dict((p, p) for p in programs)
-
- if prefer_ffmpeg is False:
- prefs = ('avconv', 'ffmpeg')
- else:
- prefs = ('ffmpeg', 'avconv')
- for p in prefs:
- if self._versions[p]:
- self.basename = p
- break
-
- if prefer_ffmpeg is False:
- prefs = ('avprobe', 'ffprobe')
- else:
- prefs = ('ffprobe', 'avprobe')
- for p in prefs:
- if self._versions[p]:
- self.probe_basename = p
- break
-
- @property
- def available(self):
- return self.basename is not None
-
- @property
- def executable(self):
- return self._paths[self.basename]
-
- @property
- def probe_available(self):
- return self.probe_basename is not None
-
- @property
- def probe_executable(self):
- return self._paths[self.probe_basename]
-
- def get_audio_codec(self, path):
- if not self.probe_available and not self.available:
- raise PostProcessingError('ffprobe/avprobe and ffmpeg/avconv not found. Please install one.')
- try:
- if self.probe_available:
- cmd = [
- encodeFilename(self.probe_executable, True),
- encodeArgument('-show_streams')]
- else:
- cmd = [
- encodeFilename(self.executable, True),
- encodeArgument('-i')]
- cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True))
- if self._downloader.params.get('verbose', False):
- self._downloader.to_screen(
- '[debug] %s command line: %s' % (self.basename, shell_quote(cmd)))
- handle = subprocess.Popen(
- cmd, stderr=subprocess.PIPE,
- stdout=subprocess.PIPE, stdin=subprocess.PIPE)
- stdout_data, stderr_data = handle.communicate()
- expected_ret = 0 if self.probe_available else 1
- if handle.wait() != expected_ret:
- return None
- except (IOError, OSError):
- return None
- output = (stdout_data if self.probe_available else stderr_data).decode('ascii', 'ignore')
- if self.probe_available:
- audio_codec = None
- for line in output.split('\n'):
- if line.startswith('codec_name='):
- audio_codec = line.split('=')[1].strip()
- elif line.strip() == 'codec_type=audio' and audio_codec is not None:
- return audio_codec
- else:
- # Stream #FILE_INDEX:STREAM_INDEX[STREAM_ID](LANGUAGE): CODEC_TYPE: CODEC_NAME
- mobj = re.search(
- r'Stream\s*#\d+:\d+(?:\[0x[0-9a-f]+\])?(?:\([a-z]{3}\))?:\s*Audio:\s*([0-9a-z]+)',
- output)
- if mobj:
- return mobj.group(1)
- return None
-
- def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
- self.check_version()
-
- oldest_mtime = min(
- os.stat(encodeFilename(path)).st_mtime for path in input_paths)
-
- opts += self._configuration_args()
-
- files_cmd = []
- for path in input_paths:
- files_cmd.extend([
- encodeArgument('-i'),
- encodeFilename(self._ffmpeg_filename_argument(path), True)
- ])
- cmd = [encodeFilename(self.executable, True), encodeArgument('-y')]
- # avconv does not have repeat option
- if self.basename == 'ffmpeg':
- cmd += [encodeArgument('-loglevel'), encodeArgument('repeat+info')]
- cmd += (files_cmd
- + [encodeArgument(o) for o in opts]
- + [encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
-
- if self._downloader.params.get('verbose', False):
- self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd))
- p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
- stdout, stderr = p.communicate()
- if p.returncode != 0:
- stderr = stderr.decode('utf-8', 'replace')
- msg = stderr.strip().split('\n')[-1]
- raise FFmpegPostProcessorError(msg)
- self.try_utime(out_path, oldest_mtime, oldest_mtime)
-
- def run_ffmpeg(self, path, out_path, opts):
- self.run_ffmpeg_multiple_files([path], out_path, opts)
-
- def _ffmpeg_filename_argument(self, fn):
- # Always use 'file:' because the filename may contain ':' (ffmpeg
- # interprets that as a protocol) or can start with '-' (-- is broken in
- # ffmpeg, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details)
- # Also leave '-' intact in order not to break streaming to stdout.
- return 'file:' + fn if fn != '-' else fn
-
-
-class FFmpegExtractAudioPP(FFmpegPostProcessor):
- def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
- FFmpegPostProcessor.__init__(self, downloader)
- if preferredcodec is None:
- preferredcodec = 'best'
- self._preferredcodec = preferredcodec
- self._preferredquality = preferredquality
- self._nopostoverwrites = nopostoverwrites
-
- def run_ffmpeg(self, path, out_path, codec, more_opts):
- if codec is None:
- acodec_opts = []
- else:
- acodec_opts = ['-acodec', codec]
- opts = ['-vn'] + acodec_opts + more_opts
- try:
- FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
- except FFmpegPostProcessorError as err:
- raise AudioConversionError(err.msg)
-
- def run(self, information):
- path = information['filepath']
-
- filecodec = self.get_audio_codec(path)
- if filecodec is None:
- raise PostProcessingError('WARNING: unable to obtain file audio codec with ffprobe')
-
- more_opts = []
- if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
- if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
- # Lossless, but in another container
- acodec = 'copy'
- extension = 'm4a'
- more_opts = ['-bsf:a', 'aac_adtstoasc']
- elif filecodec in ['aac', 'flac', 'mp3', 'vorbis', 'opus']:
- # Lossless if possible
- acodec = 'copy'
- extension = filecodec
- if filecodec == 'aac':
- more_opts = ['-f', 'adts']
- if filecodec == 'vorbis':
- extension = 'ogg'
- else:
- # MP3 otherwise.
- acodec = 'libmp3lame'
- extension = 'mp3'
- more_opts = []
- if self._preferredquality is not None:
- if int(self._preferredquality) < 10:
- more_opts += ['-q:a', self._preferredquality]
- else:
- more_opts += ['-b:a', self._preferredquality + 'k']
- else:
- # We convert the audio (lossy if codec is lossy)
- acodec = ACODECS[self._preferredcodec]
- extension = self._preferredcodec
- more_opts = []
- if self._preferredquality is not None:
- # The opus codec doesn't support the -aq option
- if int(self._preferredquality) < 10 and extension != 'opus':
- more_opts += ['-q:a', self._preferredquality]
- else:
- more_opts += ['-b:a', self._preferredquality + 'k']
- if self._preferredcodec == 'aac':
- more_opts += ['-f', 'adts']
- if self._preferredcodec == 'm4a':
- more_opts += ['-bsf:a', 'aac_adtstoasc']
- if self._preferredcodec == 'vorbis':
- extension = 'ogg'
- if self._preferredcodec == 'wav':
- extension = 'wav'
- more_opts += ['-f', 'wav']
-
- prefix, sep, ext = path.rpartition('.') # not os.path.splitext, since the latter does not work on unicode in all setups
- new_path = prefix + sep + extension
-
- information['filepath'] = new_path
- information['ext'] = extension
-
- # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
- if (new_path == path
- or (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))):
- self._downloader.to_screen('[ffmpeg] Post-process file %s exists, skipping' % new_path)
- return [], information
-
- try:
- self._downloader.to_screen('[ffmpeg] Destination: ' + new_path)
- self.run_ffmpeg(path, new_path, acodec, more_opts)
- except AudioConversionError as e:
- raise PostProcessingError(
- 'audio conversion failed: ' + e.msg)
- except Exception:
- raise PostProcessingError('error running ' + self.basename)
-
- # Try to update the date time for extracted audio file.
- if information.get('filetime') is not None:
- self.try_utime(
- new_path, time.time(), information['filetime'],
- errnote='Cannot update utime of audio file')
-
- return [path], information
-
-
-class FFmpegVideoConvertorPP(FFmpegPostProcessor):
- def __init__(self, downloader=None, preferedformat=None):
- super(FFmpegVideoConvertorPP, self).__init__(downloader)
- self._preferedformat = preferedformat
-
- def run(self, information):
- path = information['filepath']
- if information['ext'] == self._preferedformat:
- self._downloader.to_screen('[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
- return [], information
- options = []
- if self._preferedformat == 'avi':
- options.extend(['-c:v', 'libxvid', '-vtag', 'XVID'])
- prefix, sep, ext = path.rpartition('.')
- outpath = prefix + sep + self._preferedformat
- self._downloader.to_screen('[' + 'ffmpeg' + '] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) + outpath)
- self.run_ffmpeg(path, outpath, options)
- information['filepath'] = outpath
- information['format'] = self._preferedformat
- information['ext'] = self._preferedformat
- return [path], information
-
-
-class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
- def run(self, information):
- if information['ext'] not in ('mp4', 'webm', 'mkv'):
- self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4, webm or mkv files')
- return [], information
- subtitles = information.get('requested_subtitles')
- if not subtitles:
- self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed')
- return [], information
-
- filename = information['filepath']
-
- ext = information['ext']
- sub_langs = []
- sub_filenames = []
- webm_vtt_warn = False
-
- for lang, sub_info in subtitles.items():
- sub_ext = sub_info['ext']
- if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
- sub_langs.append(lang)
- sub_filenames.append(subtitles_filename(filename, lang, sub_ext))
- else:
- if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt':
- webm_vtt_warn = True
- self._downloader.to_screen('[ffmpeg] Only WebVTT subtitles can be embedded in webm files')
-
- if not sub_langs:
- return [], information
-
- input_files = [filename] + sub_filenames
-
- opts = [
- '-map', '0',
- '-c', 'copy',
- # Don't copy the existing subtitles, we may be running the
- # postprocessor a second time
- '-map', '-0:s',
- # Don't copy Apple TV chapters track, bin_data (see #19042, #19024,
- # https://trac.ffmpeg.org/ticket/6016)
- '-map', '-0:d',
- ]
- if information['ext'] == 'mp4':
- opts += ['-c:s', 'mov_text']
- for (i, lang) in enumerate(sub_langs):
- opts.extend(['-map', '%d:0' % (i + 1)])
- lang_code = ISO639Utils.short2long(lang) or lang
- opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
-
- temp_filename = prepend_extension(filename, 'temp')
- self._downloader.to_screen('[ffmpeg] Embedding subtitles in \'%s\'' % filename)
- self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
- os.remove(encodeFilename(filename))
- os.rename(encodeFilename(temp_filename), encodeFilename(filename))
-
- return sub_filenames, information
-
-
-class FFmpegMetadataPP(FFmpegPostProcessor):
- def run(self, info):
- metadata = {}
-
- def add(meta_list, info_list=None):
- if not info_list:
- info_list = meta_list
- if not isinstance(meta_list, (list, tuple)):
- meta_list = (meta_list,)
- if not isinstance(info_list, (list, tuple)):
- info_list = (info_list,)
- for info_f in info_list:
- if info.get(info_f) is not None:
- for meta_f in meta_list:
- metadata[meta_f] = info[info_f]
- break
-
- add('title', ('track', 'title'))
- add('date', 'upload_date')
- add(('description', 'comment'), 'description')
- add('purl', 'webpage_url')
- add('track', 'track_number')
- add('artist', ('artist', 'creator', 'uploader', 'uploader_id'))
- add('genre')
- add('album')
- add('album_artist')
- add('disc', 'disc_number')
-
- if not metadata:
- self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add')
- return [], info
-
- filename = info['filepath']
- temp_filename = prepend_extension(filename, 'temp')
- in_filenames = [filename]
- options = []
-
- if info['ext'] == 'm4a':
- options.extend(['-vn', '-acodec', 'copy'])
- else:
- options.extend(['-c', 'copy'])
-
- for (name, value) in metadata.items():
- options.extend(['-metadata', '%s=%s' % (name, value)])
-
- chapters = info.get('chapters', [])
- if chapters:
- metadata_filename = replace_extension(filename, 'meta')
- with io.open(metadata_filename, 'wt', encoding='utf-8') as f:
- def ffmpeg_escape(text):
- return re.sub(r'(=|;|#|\\|\n)', r'\\\1', text)
-
- metadata_file_content = ';FFMETADATA1\n'
- for chapter in chapters:
- metadata_file_content += '[CHAPTER]\nTIMEBASE=1/1000\n'
- metadata_file_content += 'START=%d\n' % (chapter['start_time'] * 1000)
- metadata_file_content += 'END=%d\n' % (chapter['end_time'] * 1000)
- chapter_title = chapter.get('title')
- if chapter_title:
- metadata_file_content += 'title=%s\n' % ffmpeg_escape(chapter_title)
- f.write(metadata_file_content)
- in_filenames.append(metadata_filename)
- options.extend(['-map_metadata', '1'])
-
- self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename)
- self.run_ffmpeg_multiple_files(in_filenames, temp_filename, options)
- if chapters:
- os.remove(metadata_filename)
- os.remove(encodeFilename(filename))
- os.rename(encodeFilename(temp_filename), encodeFilename(filename))
- return [], info
-
-
-class FFmpegMergerPP(FFmpegPostProcessor):
- def run(self, info):
- filename = info['filepath']
- temp_filename = prepend_extension(filename, 'temp')
- args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0']
- self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename)
- self.run_ffmpeg_multiple_files(info['__files_to_merge'], temp_filename, args)
- os.rename(encodeFilename(temp_filename), encodeFilename(filename))
- return info['__files_to_merge'], info
-
- def can_merge(self):
- # TODO: figure out merge-capable ffmpeg version
- if self.basename != 'avconv':
- return True
-
- required_version = '10-0'
- if is_outdated_version(
- self._versions[self.basename], required_version):
- warning = ('Your copy of %s is outdated and unable to properly mux separate video and audio files, '
- 'youtube-dl will download single file media. '
- 'Update %s to version %s or newer to fix this.') % (
- self.basename, self.basename, required_version)
- if self._downloader:
- self._downloader.report_warning(warning)
- return False
- return True
-
-
-class FFmpegFixupStretchedPP(FFmpegPostProcessor):
- def run(self, info):
- stretched_ratio = info.get('stretched_ratio')
- if stretched_ratio is None or stretched_ratio == 1:
- return [], info
-
- filename = info['filepath']
- temp_filename = prepend_extension(filename, 'temp')
-
- options = ['-c', 'copy', '-aspect', '%f' % stretched_ratio]
- self._downloader.to_screen('[ffmpeg] Fixing aspect ratio in "%s"' % filename)
- self.run_ffmpeg(filename, temp_filename, options)
-
- os.remove(encodeFilename(filename))
- os.rename(encodeFilename(temp_filename), encodeFilename(filename))
-
- return [], info
-
-
-class FFmpegFixupM4aPP(FFmpegPostProcessor):
- def run(self, info):
- if info.get('container') != 'm4a_dash':
- return [], info
-
- filename = info['filepath']
- temp_filename = prepend_extension(filename, 'temp')
-
- options = ['-c', 'copy', '-f', 'mp4']
- self._downloader.to_screen('[ffmpeg] Correcting container in "%s"' % filename)
- self.run_ffmpeg(filename, temp_filename, options)
-
- os.remove(encodeFilename(filename))
- os.rename(encodeFilename(temp_filename), encodeFilename(filename))
-
- return [], info
-
-
-class FFmpegFixupM3u8PP(FFmpegPostProcessor):
- def run(self, info):
- filename = info['filepath']
- if self.get_audio_codec(filename) == 'aac':
- temp_filename = prepend_extension(filename, 'temp')
-
- options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
- self._downloader.to_screen('[ffmpeg] Fixing malformed AAC bitstream in "%s"' % filename)
- self.run_ffmpeg(filename, temp_filename, options)
-
- os.remove(encodeFilename(filename))
- os.rename(encodeFilename(temp_filename), encodeFilename(filename))
- return [], info
-
-
-class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
- def __init__(self, downloader=None, format=None):
- super(FFmpegSubtitlesConvertorPP, self).__init__(downloader)
- self.format = format
-
- def run(self, info):
- subs = info.get('requested_subtitles')
- filename = info['filepath']
- new_ext = self.format
- new_format = new_ext
- if new_format == 'vtt':
- new_format = 'webvtt'
- if subs is None:
- self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert')
- return [], info
- self._downloader.to_screen('[ffmpeg] Converting subtitles')
- sub_filenames = []
- for lang, sub in subs.items():
- ext = sub['ext']
- if ext == new_ext:
- self._downloader.to_screen(
- '[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext)
- continue
- old_file = subtitles_filename(filename, lang, ext)
- sub_filenames.append(old_file)
- new_file = subtitles_filename(filename, lang, new_ext)
-
- if ext in ('dfxp', 'ttml', 'tt'):
- self._downloader.report_warning(
- 'You have requested to convert dfxp (TTML) subtitles into another format, '
- 'which results in style information loss')
-
- dfxp_file = old_file
- srt_file = subtitles_filename(filename, lang, 'srt')
-
- with open(dfxp_file, 'rb') as f:
- srt_data = dfxp2srt(f.read())
-
- with io.open(srt_file, 'wt', encoding='utf-8') as f:
- f.write(srt_data)
- old_file = srt_file
-
- subs[lang] = {
- 'ext': 'srt',
- 'data': srt_data
- }
-
- if new_ext == 'srt':
- continue
- else:
- sub_filenames.append(srt_file)
-
- self.run_ffmpeg(old_file, new_file, ['-f', new_format])
-
- with io.open(new_file, 'rt', encoding='utf-8') as f:
- subs[lang] = {
- 'ext': new_ext,
- 'data': f.read(),
- }
-
- return sub_filenames, info
diff --git a/youtube_dl/update.py b/youtube_dl/update.py
deleted file mode 100644
index 002ea7f33..000000000
--- a/youtube_dl/update.py
+++ /dev/null
@@ -1,187 +0,0 @@
-from __future__ import unicode_literals
-
-import io
-import json
-import traceback
-import hashlib
-import os
-import subprocess
-import sys
-from zipimport import zipimporter
-
-from .utils import encode_compat_str
-
-from .version import __version__
-
-
-def rsa_verify(message, signature, key):
- from hashlib import sha256
- assert isinstance(message, bytes)
- byte_size = (len(bin(key[0])) - 2 + 8 - 1) // 8
- signature = ('%x' % pow(int(signature, 16), key[1], key[0])).encode()
- signature = (byte_size * 2 - len(signature)) * b'0' + signature
- asn1 = b'3031300d060960864801650304020105000420'
- asn1 += sha256(message).hexdigest().encode()
- if byte_size < len(asn1) // 2 + 11:
- return False
- expected = b'0001' + (byte_size - len(asn1) // 2 - 3) * b'ff' + b'00' + asn1
- return expected == signature
-
-
-def update_self(to_screen, verbose, opener):
- """Update the program file with the latest version from the repository"""
-
- UPDATE_URL = 'https://yt-dl.org/update/'
- VERSION_URL = UPDATE_URL + 'LATEST_VERSION'
- JSON_URL = UPDATE_URL + 'versions.json'
- UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
-
- if not isinstance(globals().get('__loader__'), zipimporter) and not hasattr(sys, 'frozen'):
- to_screen('It looks like you installed youtube-dl with a package manager, pip, setup.py or a tarball. Please use that to update.')
- return
-
- # Check if there is a new version
- try:
- newversion = opener.open(VERSION_URL).read().decode('utf-8').strip()
- except Exception:
- if verbose:
- to_screen(encode_compat_str(traceback.format_exc()))
- to_screen('ERROR: can\'t find the current version. Please try again later.')
- return
- if newversion == __version__:
- to_screen('youtube-dl is up-to-date (' + __version__ + ')')
- return
-
- # Download and check versions info
- try:
- versions_info = opener.open(JSON_URL).read().decode('utf-8')
- versions_info = json.loads(versions_info)
- except Exception:
- if verbose:
- to_screen(encode_compat_str(traceback.format_exc()))
- to_screen('ERROR: can\'t obtain versions info. Please try again later.')
- return
- if 'signature' not in versions_info:
- to_screen('ERROR: the versions file is not signed or corrupted. Aborting.')
- return
- signature = versions_info['signature']
- del versions_info['signature']
- if not rsa_verify(json.dumps(versions_info, sort_keys=True).encode('utf-8'), signature, UPDATES_RSA_KEY):
- to_screen('ERROR: the versions file signature is invalid. Aborting.')
- return
-
- version_id = versions_info['latest']
-
- def version_tuple(version_str):
- return tuple(map(int, version_str.split('.')))
- if version_tuple(__version__) >= version_tuple(version_id):
- to_screen('youtube-dl is up to date (%s)' % __version__)
- return
-
- to_screen('Updating to version ' + version_id + ' ...')
- version = versions_info['versions'][version_id]
-
- print_notes(to_screen, versions_info['versions'])
-
- # sys.executable is set to the full pathname of the exe-file for py2exe
- filename = sys.executable if hasattr(sys, 'frozen') else sys.argv[0]
-
- if not os.access(filename, os.W_OK):
- to_screen('ERROR: no write permissions on %s' % filename)
- return
-
- # Py2EXE
- if hasattr(sys, 'frozen'):
- exe = filename
- directory = os.path.dirname(exe)
- if not os.access(directory, os.W_OK):
- to_screen('ERROR: no write permissions on %s' % directory)
- return
-
- try:
- urlh = opener.open(version['exe'][0])
- newcontent = urlh.read()
- urlh.close()
- except (IOError, OSError):
- if verbose:
- to_screen(encode_compat_str(traceback.format_exc()))
- to_screen('ERROR: unable to download latest version')
- return
-
- newcontent_hash = hashlib.sha256(newcontent).hexdigest()
- if newcontent_hash != version['exe'][1]:
- to_screen('ERROR: the downloaded file hash does not match. Aborting.')
- return
-
- try:
- with open(exe + '.new', 'wb') as outf:
- outf.write(newcontent)
- except (IOError, OSError):
- if verbose:
- to_screen(encode_compat_str(traceback.format_exc()))
- to_screen('ERROR: unable to write the new version')
- return
-
- try:
- bat = os.path.join(directory, 'youtube-dl-updater.bat')
- with io.open(bat, 'w') as batfile:
- batfile.write('''
-@echo off
-echo Waiting for file handle to be closed ...
-ping 127.0.0.1 -n 5 -w 1000 > NUL
-move /Y "%s.new" "%s" > NUL
-echo Updated youtube-dl to version %s.
-start /b "" cmd /c del "%%~f0"&exit /b"
- \n''' % (exe, exe, version_id))
-
- subprocess.Popen([bat]) # Continues to run in the background
- return # Do not show premature success messages
- except (IOError, OSError):
- if verbose:
- to_screen(encode_compat_str(traceback.format_exc()))
- to_screen('ERROR: unable to overwrite current version')
- return
-
- # Zip unix package
- elif isinstance(globals().get('__loader__'), zipimporter):
- try:
- urlh = opener.open(version['bin'][0])
- newcontent = urlh.read()
- urlh.close()
- except (IOError, OSError):
- if verbose:
- to_screen(encode_compat_str(traceback.format_exc()))
- to_screen('ERROR: unable to download latest version')
- return
-
- newcontent_hash = hashlib.sha256(newcontent).hexdigest()
- if newcontent_hash != version['bin'][1]:
- to_screen('ERROR: the downloaded file hash does not match. Aborting.')
- return
-
- try:
- with open(filename, 'wb') as outf:
- outf.write(newcontent)
- except (IOError, OSError):
- if verbose:
- to_screen(encode_compat_str(traceback.format_exc()))
- to_screen('ERROR: unable to overwrite current version')
- return
-
- to_screen('Updated youtube-dl. Restart youtube-dl to use the new version.')
-
-
-def get_notes(versions, fromVersion):
- notes = []
- for v, vdata in sorted(versions.items()):
- if v > fromVersion:
- notes.extend(vdata.get('notes', []))
- return notes
-
-
-def print_notes(to_screen, versions, fromVersion=__version__):
- notes = get_notes(versions, fromVersion)
- if notes:
- to_screen('PLEASE NOTE:')
- for note in notes:
- to_screen(note)
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
deleted file mode 100644
index 798757241..000000000
--- a/youtube_dl/utils.py
+++ /dev/null
@@ -1,5593 +0,0 @@
-#!/usr/bin/env python
-# coding: utf-8
-
-from __future__ import unicode_literals
-
-import base64
-import binascii
-import calendar
-import codecs
-import contextlib
-import ctypes
-import datetime
-import email.utils
-import email.header
-import errno
-import functools
-import gzip
-import io
-import itertools
-import json
-import locale
-import math
-import operator
-import os
-import platform
-import random
-import re
-import socket
-import ssl
-import subprocess
-import sys
-import tempfile
-import traceback
-import xml.etree.ElementTree
-import zlib
-
-from .compat import (
- compat_HTMLParseError,
- compat_HTMLParser,
- compat_basestring,
- compat_chr,
- compat_cookiejar,
- compat_ctypes_WINFUNCTYPE,
- compat_etree_fromstring,
- compat_expanduser,
- compat_html_entities,
- compat_html_entities_html5,
- compat_http_client,
- compat_kwargs,
- compat_os_name,
- compat_parse_qs,
- compat_shlex_quote,
- compat_str,
- compat_struct_pack,
- compat_struct_unpack,
- compat_urllib_error,
- compat_urllib_parse,
- compat_urllib_parse_urlencode,
- compat_urllib_parse_urlparse,
- compat_urllib_parse_unquote_plus,
- compat_urllib_request,
- compat_urlparse,
- compat_xpath,
-)
-
-from .socks import (
- ProxyType,
- sockssocket,
-)
-
-
-def register_socks_protocols():
- # "Register" SOCKS protocols
- # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
- # URLs with protocols not in urlparse.uses_netloc are not handled correctly
- for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
- if scheme not in compat_urlparse.uses_netloc:
- compat_urlparse.uses_netloc.append(scheme)
-
-
-# This is not clearly defined otherwise
-compiled_regex_type = type(re.compile(''))
-
-
-def random_user_agent():
- _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
- _CHROME_VERSIONS = (
- '74.0.3729.129',
- '76.0.3780.3',
- '76.0.3780.2',
- '74.0.3729.128',
- '76.0.3780.1',
- '76.0.3780.0',
- '75.0.3770.15',
- '74.0.3729.127',
- '74.0.3729.126',
- '76.0.3779.1',
- '76.0.3779.0',
- '75.0.3770.14',
- '74.0.3729.125',
- '76.0.3778.1',
- '76.0.3778.0',
- '75.0.3770.13',
- '74.0.3729.124',
- '74.0.3729.123',
- '73.0.3683.121',
- '76.0.3777.1',
- '76.0.3777.0',
- '75.0.3770.12',
- '74.0.3729.122',
- '76.0.3776.4',
- '75.0.3770.11',
- '74.0.3729.121',
- '76.0.3776.3',
- '76.0.3776.2',
- '73.0.3683.120',
- '74.0.3729.120',
- '74.0.3729.119',
- '74.0.3729.118',
- '76.0.3776.1',
- '76.0.3776.0',
- '76.0.3775.5',
- '75.0.3770.10',
- '74.0.3729.117',
- '76.0.3775.4',
- '76.0.3775.3',
- '74.0.3729.116',
- '75.0.3770.9',
- '76.0.3775.2',
- '76.0.3775.1',
- '76.0.3775.0',
- '75.0.3770.8',
- '74.0.3729.115',
- '74.0.3729.114',
- '76.0.3774.1',
- '76.0.3774.0',
- '75.0.3770.7',
- '74.0.3729.113',
- '74.0.3729.112',
- '74.0.3729.111',
- '76.0.3773.1',
- '76.0.3773.0',
- '75.0.3770.6',
- '74.0.3729.110',
- '74.0.3729.109',
- '76.0.3772.1',
- '76.0.3772.0',
- '75.0.3770.5',
- '74.0.3729.108',
- '74.0.3729.107',
- '76.0.3771.1',
- '76.0.3771.0',
- '75.0.3770.4',
- '74.0.3729.106',
- '74.0.3729.105',
- '75.0.3770.3',
- '74.0.3729.104',
- '74.0.3729.103',
- '74.0.3729.102',
- '75.0.3770.2',
- '74.0.3729.101',
- '75.0.3770.1',
- '75.0.3770.0',
- '74.0.3729.100',
- '75.0.3769.5',
- '75.0.3769.4',
- '74.0.3729.99',
- '75.0.3769.3',
- '75.0.3769.2',
- '75.0.3768.6',
- '74.0.3729.98',
- '75.0.3769.1',
- '75.0.3769.0',
- '74.0.3729.97',
- '73.0.3683.119',
- '73.0.3683.118',
- '74.0.3729.96',
- '75.0.3768.5',
- '75.0.3768.4',
- '75.0.3768.3',
- '75.0.3768.2',
- '74.0.3729.95',
- '74.0.3729.94',
- '75.0.3768.1',
- '75.0.3768.0',
- '74.0.3729.93',
- '74.0.3729.92',
- '73.0.3683.117',
- '74.0.3729.91',
- '75.0.3766.3',
- '74.0.3729.90',
- '75.0.3767.2',
- '75.0.3767.1',
- '75.0.3767.0',
- '74.0.3729.89',
- '73.0.3683.116',
- '75.0.3766.2',
- '74.0.3729.88',
- '75.0.3766.1',
- '75.0.3766.0',
- '74.0.3729.87',
- '73.0.3683.115',
- '74.0.3729.86',
- '75.0.3765.1',
- '75.0.3765.0',
- '74.0.3729.85',
- '73.0.3683.114',
- '74.0.3729.84',
- '75.0.3764.1',
- '75.0.3764.0',
- '74.0.3729.83',
- '73.0.3683.113',
- '75.0.3763.2',
- '75.0.3761.4',
- '74.0.3729.82',
- '75.0.3763.1',
- '75.0.3763.0',
- '74.0.3729.81',
- '73.0.3683.112',
- '75.0.3762.1',
- '75.0.3762.0',
- '74.0.3729.80',
- '75.0.3761.3',
- '74.0.3729.79',
- '73.0.3683.111',
- '75.0.3761.2',
- '74.0.3729.78',
- '74.0.3729.77',
- '75.0.3761.1',
- '75.0.3761.0',
- '73.0.3683.110',
- '74.0.3729.76',
- '74.0.3729.75',
- '75.0.3760.0',
- '74.0.3729.74',
- '75.0.3759.8',
- '75.0.3759.7',
- '75.0.3759.6',
- '74.0.3729.73',
- '75.0.3759.5',
- '74.0.3729.72',
- '73.0.3683.109',
- '75.0.3759.4',
- '75.0.3759.3',
- '74.0.3729.71',
- '75.0.3759.2',
- '74.0.3729.70',
- '73.0.3683.108',
- '74.0.3729.69',
- '75.0.3759.1',
- '75.0.3759.0',
- '74.0.3729.68',
- '73.0.3683.107',
- '74.0.3729.67',
- '75.0.3758.1',
- '75.0.3758.0',
- '74.0.3729.66',
- '73.0.3683.106',
- '74.0.3729.65',
- '75.0.3757.1',
- '75.0.3757.0',
- '74.0.3729.64',
- '73.0.3683.105',
- '74.0.3729.63',
- '75.0.3756.1',
- '75.0.3756.0',
- '74.0.3729.62',
- '73.0.3683.104',
- '75.0.3755.3',
- '75.0.3755.2',
- '73.0.3683.103',
- '75.0.3755.1',
- '75.0.3755.0',
- '74.0.3729.61',
- '73.0.3683.102',
- '74.0.3729.60',
- '75.0.3754.2',
- '74.0.3729.59',
- '75.0.3753.4',
- '74.0.3729.58',
- '75.0.3754.1',
- '75.0.3754.0',
- '74.0.3729.57',
- '73.0.3683.101',
- '75.0.3753.3',
- '75.0.3752.2',
- '75.0.3753.2',
- '74.0.3729.56',
- '75.0.3753.1',
- '75.0.3753.0',
- '74.0.3729.55',
- '73.0.3683.100',
- '74.0.3729.54',
- '75.0.3752.1',
- '75.0.3752.0',
- '74.0.3729.53',
- '73.0.3683.99',
- '74.0.3729.52',
- '75.0.3751.1',
- '75.0.3751.0',
- '74.0.3729.51',
- '73.0.3683.98',
- '74.0.3729.50',
- '75.0.3750.0',
- '74.0.3729.49',
- '74.0.3729.48',
- '74.0.3729.47',
- '75.0.3749.3',
- '74.0.3729.46',
- '73.0.3683.97',
- '75.0.3749.2',
- '74.0.3729.45',
- '75.0.3749.1',
- '75.0.3749.0',
- '74.0.3729.44',
- '73.0.3683.96',
- '74.0.3729.43',
- '74.0.3729.42',
- '75.0.3748.1',
- '75.0.3748.0',
- '74.0.3729.41',
- '75.0.3747.1',
- '73.0.3683.95',
- '75.0.3746.4',
- '74.0.3729.40',
- '74.0.3729.39',
- '75.0.3747.0',
- '75.0.3746.3',
- '75.0.3746.2',
- '74.0.3729.38',
- '75.0.3746.1',
- '75.0.3746.0',
- '74.0.3729.37',
- '73.0.3683.94',
- '75.0.3745.5',
- '75.0.3745.4',
- '75.0.3745.3',
- '75.0.3745.2',
- '74.0.3729.36',
- '75.0.3745.1',
- '75.0.3745.0',
- '75.0.3744.2',
- '74.0.3729.35',
- '73.0.3683.93',
- '74.0.3729.34',
- '75.0.3744.1',
- '75.0.3744.0',
- '74.0.3729.33',
- '73.0.3683.92',
- '74.0.3729.32',
- '74.0.3729.31',
- '73.0.3683.91',
- '75.0.3741.2',
- '75.0.3740.5',
- '74.0.3729.30',
- '75.0.3741.1',
- '75.0.3741.0',
- '74.0.3729.29',
- '75.0.3740.4',
- '73.0.3683.90',
- '74.0.3729.28',
- '75.0.3740.3',
- '73.0.3683.89',
- '75.0.3740.2',
- '74.0.3729.27',
- '75.0.3740.1',
- '75.0.3740.0',
- '74.0.3729.26',
- '73.0.3683.88',
- '73.0.3683.87',
- '74.0.3729.25',
- '75.0.3739.1',
- '75.0.3739.0',
- '73.0.3683.86',
- '74.0.3729.24',
- '73.0.3683.85',
- '75.0.3738.4',
- '75.0.3738.3',
- '75.0.3738.2',
- '75.0.3738.1',
- '75.0.3738.0',
- '74.0.3729.23',
- '73.0.3683.84',
- '74.0.3729.22',
- '74.0.3729.21',
- '75.0.3737.1',
- '75.0.3737.0',
- '74.0.3729.20',
- '73.0.3683.83',
- '74.0.3729.19',
- '75.0.3736.1',
- '75.0.3736.0',
- '74.0.3729.18',
- '73.0.3683.82',
- '74.0.3729.17',
- '75.0.3735.1',
- '75.0.3735.0',
- '74.0.3729.16',
- '73.0.3683.81',
- '75.0.3734.1',
- '75.0.3734.0',
- '74.0.3729.15',
- '73.0.3683.80',
- '74.0.3729.14',
- '75.0.3733.1',
- '75.0.3733.0',
- '75.0.3732.1',
- '74.0.3729.13',
- '74.0.3729.12',
- '73.0.3683.79',
- '74.0.3729.11',
- '75.0.3732.0',
- '74.0.3729.10',
- '73.0.3683.78',
- '74.0.3729.9',
- '74.0.3729.8',
- '74.0.3729.7',
- '75.0.3731.3',
- '75.0.3731.2',
- '75.0.3731.0',
- '74.0.3729.6',
- '73.0.3683.77',
- '73.0.3683.76',
- '75.0.3730.5',
- '75.0.3730.4',
- '73.0.3683.75',
- '74.0.3729.5',
- '73.0.3683.74',
- '75.0.3730.3',
- '75.0.3730.2',
- '74.0.3729.4',
- '73.0.3683.73',
- '73.0.3683.72',
- '75.0.3730.1',
- '75.0.3730.0',
- '74.0.3729.3',
- '73.0.3683.71',
- '74.0.3729.2',
- '73.0.3683.70',
- '74.0.3729.1',
- '74.0.3729.0',
- '74.0.3726.4',
- '73.0.3683.69',
- '74.0.3726.3',
- '74.0.3728.0',
- '74.0.3726.2',
- '73.0.3683.68',
- '74.0.3726.1',
- '74.0.3726.0',
- '74.0.3725.4',
- '73.0.3683.67',
- '73.0.3683.66',
- '74.0.3725.3',
- '74.0.3725.2',
- '74.0.3725.1',
- '74.0.3724.8',
- '74.0.3725.0',
- '73.0.3683.65',
- '74.0.3724.7',
- '74.0.3724.6',
- '74.0.3724.5',
- '74.0.3724.4',
- '74.0.3724.3',
- '74.0.3724.2',
- '74.0.3724.1',
- '74.0.3724.0',
- '73.0.3683.64',
- '74.0.3723.1',
- '74.0.3723.0',
- '73.0.3683.63',
- '74.0.3722.1',
- '74.0.3722.0',
- '73.0.3683.62',
- '74.0.3718.9',
- '74.0.3702.3',
- '74.0.3721.3',
- '74.0.3721.2',
- '74.0.3721.1',
- '74.0.3721.0',
- '74.0.3720.6',
- '73.0.3683.61',
- '72.0.3626.122',
- '73.0.3683.60',
- '74.0.3720.5',
- '72.0.3626.121',
- '74.0.3718.8',
- '74.0.3720.4',
- '74.0.3720.3',
- '74.0.3718.7',
- '74.0.3720.2',
- '74.0.3720.1',
- '74.0.3720.0',
- '74.0.3718.6',
- '74.0.3719.5',
- '73.0.3683.59',
- '74.0.3718.5',
- '74.0.3718.4',
- '74.0.3719.4',
- '74.0.3719.3',
- '74.0.3719.2',
- '74.0.3719.1',
- '73.0.3683.58',
- '74.0.3719.0',
- '73.0.3683.57',
- '73.0.3683.56',
- '74.0.3718.3',
- '73.0.3683.55',
- '74.0.3718.2',
- '74.0.3718.1',
- '74.0.3718.0',
- '73.0.3683.54',
- '74.0.3717.2',
- '73.0.3683.53',
- '74.0.3717.1',
- '74.0.3717.0',
- '73.0.3683.52',
- '74.0.3716.1',
- '74.0.3716.0',
- '73.0.3683.51',
- '74.0.3715.1',
- '74.0.3715.0',
- '73.0.3683.50',
- '74.0.3711.2',
- '74.0.3714.2',
- '74.0.3713.3',
- '74.0.3714.1',
- '74.0.3714.0',
- '73.0.3683.49',
- '74.0.3713.1',
- '74.0.3713.0',
- '72.0.3626.120',
- '73.0.3683.48',
- '74.0.3712.2',
- '74.0.3712.1',
- '74.0.3712.0',
- '73.0.3683.47',
- '72.0.3626.119',
- '73.0.3683.46',
- '74.0.3710.2',
- '72.0.3626.118',
- '74.0.3711.1',
- '74.0.3711.0',
- '73.0.3683.45',
- '72.0.3626.117',
- '74.0.3710.1',
- '74.0.3710.0',
- '73.0.3683.44',
- '72.0.3626.116',
- '74.0.3709.1',
- '74.0.3709.0',
- '74.0.3704.9',
- '73.0.3683.43',
- '72.0.3626.115',
- '74.0.3704.8',
- '74.0.3704.7',
- '74.0.3708.0',
- '74.0.3706.7',
- '74.0.3704.6',
- '73.0.3683.42',
- '72.0.3626.114',
- '74.0.3706.6',
- '72.0.3626.113',
- '74.0.3704.5',
- '74.0.3706.5',
- '74.0.3706.4',
- '74.0.3706.3',
- '74.0.3706.2',
- '74.0.3706.1',
- '74.0.3706.0',
- '73.0.3683.41',
- '72.0.3626.112',
- '74.0.3705.1',
- '74.0.3705.0',
- '73.0.3683.40',
- '72.0.3626.111',
- '73.0.3683.39',
- '74.0.3704.4',
- '73.0.3683.38',
- '74.0.3704.3',
- '74.0.3704.2',
- '74.0.3704.1',
- '74.0.3704.0',
- '73.0.3683.37',
- '72.0.3626.110',
- '72.0.3626.109',
- '74.0.3703.3',
- '74.0.3703.2',
- '73.0.3683.36',
- '74.0.3703.1',
- '74.0.3703.0',
- '73.0.3683.35',
- '72.0.3626.108',
- '74.0.3702.2',
- '74.0.3699.3',
- '74.0.3702.1',
- '74.0.3702.0',
- '73.0.3683.34',
- '72.0.3626.107',
- '73.0.3683.33',
- '74.0.3701.1',
- '74.0.3701.0',
- '73.0.3683.32',
- '73.0.3683.31',
- '72.0.3626.105',
- '74.0.3700.1',
- '74.0.3700.0',
- '73.0.3683.29',
- '72.0.3626.103',
- '74.0.3699.2',
- '74.0.3699.1',
- '74.0.3699.0',
- '73.0.3683.28',
- '72.0.3626.102',
- '73.0.3683.27',
- '73.0.3683.26',
- '74.0.3698.0',
- '74.0.3696.2',
- '72.0.3626.101',
- '73.0.3683.25',
- '74.0.3696.1',
- '74.0.3696.0',
- '74.0.3694.8',
- '72.0.3626.100',
- '74.0.3694.7',
- '74.0.3694.6',
- '74.0.3694.5',
- '74.0.3694.4',
- '72.0.3626.99',
- '72.0.3626.98',
- '74.0.3694.3',
- '73.0.3683.24',
- '72.0.3626.97',
- '72.0.3626.96',
- '72.0.3626.95',
- '73.0.3683.23',
- '72.0.3626.94',
- '73.0.3683.22',
- '73.0.3683.21',
- '72.0.3626.93',
- '74.0.3694.2',
- '72.0.3626.92',
- '74.0.3694.1',
- '74.0.3694.0',
- '74.0.3693.6',
- '73.0.3683.20',
- '72.0.3626.91',
- '74.0.3693.5',
- '74.0.3693.4',
- '74.0.3693.3',
- '74.0.3693.2',
- '73.0.3683.19',
- '74.0.3693.1',
- '74.0.3693.0',
- '73.0.3683.18',
- '72.0.3626.90',
- '74.0.3692.1',
- '74.0.3692.0',
- '73.0.3683.17',
- '72.0.3626.89',
- '74.0.3687.3',
- '74.0.3691.1',
- '74.0.3691.0',
- '73.0.3683.16',
- '72.0.3626.88',
- '72.0.3626.87',
- '73.0.3683.15',
- '74.0.3690.1',
- '74.0.3690.0',
- '73.0.3683.14',
- '72.0.3626.86',
- '73.0.3683.13',
- '73.0.3683.12',
- '74.0.3689.1',
- '74.0.3689.0',
- '73.0.3683.11',
- '72.0.3626.85',
- '73.0.3683.10',
- '72.0.3626.84',
- '73.0.3683.9',
- '74.0.3688.1',
- '74.0.3688.0',
- '73.0.3683.8',
- '72.0.3626.83',
- '74.0.3687.2',
- '74.0.3687.1',
- '74.0.3687.0',
- '73.0.3683.7',
- '72.0.3626.82',
- '74.0.3686.4',
- '72.0.3626.81',
- '74.0.3686.3',
- '74.0.3686.2',
- '74.0.3686.1',
- '74.0.3686.0',
- '73.0.3683.6',
- '72.0.3626.80',
- '74.0.3685.1',
- '74.0.3685.0',
- '73.0.3683.5',
- '72.0.3626.79',
- '74.0.3684.1',
- '74.0.3684.0',
- '73.0.3683.4',
- '72.0.3626.78',
- '72.0.3626.77',
- '73.0.3683.3',
- '73.0.3683.2',
- '72.0.3626.76',
- '73.0.3683.1',
- '73.0.3683.0',
- '72.0.3626.75',
- '71.0.3578.141',
- '73.0.3682.1',
- '73.0.3682.0',
- '72.0.3626.74',
- '71.0.3578.140',
- '73.0.3681.4',
- '73.0.3681.3',
- '73.0.3681.2',
- '73.0.3681.1',
- '73.0.3681.0',
- '72.0.3626.73',
- '71.0.3578.139',
- '72.0.3626.72',
- '72.0.3626.71',
- '73.0.3680.1',
- '73.0.3680.0',
- '72.0.3626.70',
- '71.0.3578.138',
- '73.0.3678.2',
- '73.0.3679.1',
- '73.0.3679.0',
- '72.0.3626.69',
- '71.0.3578.137',
- '73.0.3678.1',
- '73.0.3678.0',
- '71.0.3578.136',
- '73.0.3677.1',
- '73.0.3677.0',
- '72.0.3626.68',
- '72.0.3626.67',
- '71.0.3578.135',
- '73.0.3676.1',
- '73.0.3676.0',
- '73.0.3674.2',
- '72.0.3626.66',
- '71.0.3578.134',
- '73.0.3674.1',
- '73.0.3674.0',
- '72.0.3626.65',
- '71.0.3578.133',
- '73.0.3673.2',
- '73.0.3673.1',
- '73.0.3673.0',
- '72.0.3626.64',
- '71.0.3578.132',
- '72.0.3626.63',
- '72.0.3626.62',
- '72.0.3626.61',
- '72.0.3626.60',
- '73.0.3672.1',
- '73.0.3672.0',
- '72.0.3626.59',
- '71.0.3578.131',
- '73.0.3671.3',
- '73.0.3671.2',
- '73.0.3671.1',
- '73.0.3671.0',
- '72.0.3626.58',
- '71.0.3578.130',
- '73.0.3670.1',
- '73.0.3670.0',
- '72.0.3626.57',
- '71.0.3578.129',
- '73.0.3669.1',
- '73.0.3669.0',
- '72.0.3626.56',
- '71.0.3578.128',
- '73.0.3668.2',
- '73.0.3668.1',
- '73.0.3668.0',
- '72.0.3626.55',
- '71.0.3578.127',
- '73.0.3667.2',
- '73.0.3667.1',
- '73.0.3667.0',
- '72.0.3626.54',
- '71.0.3578.126',
- '73.0.3666.1',
- '73.0.3666.0',
- '72.0.3626.53',
- '71.0.3578.125',
- '73.0.3665.4',
- '73.0.3665.3',
- '72.0.3626.52',
- '73.0.3665.2',
- '73.0.3664.4',
- '73.0.3665.1',
- '73.0.3665.0',
- '72.0.3626.51',
- '71.0.3578.124',
- '72.0.3626.50',
- '73.0.3664.3',
- '73.0.3664.2',
- '73.0.3664.1',
- '73.0.3664.0',
- '73.0.3663.2',
- '72.0.3626.49',
- '71.0.3578.123',
- '73.0.3663.1',
- '73.0.3663.0',
- '72.0.3626.48',
- '71.0.3578.122',
- '73.0.3662.1',
- '73.0.3662.0',
- '72.0.3626.47',
- '71.0.3578.121',
- '73.0.3661.1',
- '72.0.3626.46',
- '73.0.3661.0',
- '72.0.3626.45',
- '71.0.3578.120',
- '73.0.3660.2',
- '73.0.3660.1',
- '73.0.3660.0',
- '72.0.3626.44',
- '71.0.3578.119',
- '73.0.3659.1',
- '73.0.3659.0',
- '72.0.3626.43',
- '71.0.3578.118',
- '73.0.3658.1',
- '73.0.3658.0',
- '72.0.3626.42',
- '71.0.3578.117',
- '73.0.3657.1',
- '73.0.3657.0',
- '72.0.3626.41',
- '71.0.3578.116',
- '73.0.3656.1',
- '73.0.3656.0',
- '72.0.3626.40',
- '71.0.3578.115',
- '73.0.3655.1',
- '73.0.3655.0',
- '72.0.3626.39',
- '71.0.3578.114',
- '73.0.3654.1',
- '73.0.3654.0',
- '72.0.3626.38',
- '71.0.3578.113',
- '73.0.3653.1',
- '73.0.3653.0',
- '72.0.3626.37',
- '71.0.3578.112',
- '73.0.3652.1',
- '73.0.3652.0',
- '72.0.3626.36',
- '71.0.3578.111',
- '73.0.3651.1',
- '73.0.3651.0',
- '72.0.3626.35',
- '71.0.3578.110',
- '73.0.3650.1',
- '73.0.3650.0',
- '72.0.3626.34',
- '71.0.3578.109',
- '73.0.3649.1',
- '73.0.3649.0',
- '72.0.3626.33',
- '71.0.3578.108',
- '73.0.3648.2',
- '73.0.3648.1',
- '73.0.3648.0',
- '72.0.3626.32',
- '71.0.3578.107',
- '73.0.3647.2',
- '73.0.3647.1',
- '73.0.3647.0',
- '72.0.3626.31',
- '71.0.3578.106',
- '73.0.3635.3',
- '73.0.3646.2',
- '73.0.3646.1',
- '73.0.3646.0',
- '72.0.3626.30',
- '71.0.3578.105',
- '72.0.3626.29',
- '73.0.3645.2',
- '73.0.3645.1',
- '73.0.3645.0',
- '72.0.3626.28',
- '71.0.3578.104',
- '72.0.3626.27',
- '72.0.3626.26',
- '72.0.3626.25',
- '72.0.3626.24',
- '73.0.3644.0',
- '73.0.3643.2',
- '72.0.3626.23',
- '71.0.3578.103',
- '73.0.3643.1',
- '73.0.3643.0',
- '72.0.3626.22',
- '71.0.3578.102',
- '73.0.3642.1',
- '73.0.3642.0',
- '72.0.3626.21',
- '71.0.3578.101',
- '73.0.3641.1',
- '73.0.3641.0',
- '72.0.3626.20',
- '71.0.3578.100',
- '72.0.3626.19',
- '73.0.3640.1',
- '73.0.3640.0',
- '72.0.3626.18',
- '73.0.3639.1',
- '71.0.3578.99',
- '73.0.3639.0',
- '72.0.3626.17',
- '73.0.3638.2',
- '72.0.3626.16',
- '73.0.3638.1',
- '73.0.3638.0',
- '72.0.3626.15',
- '71.0.3578.98',
- '73.0.3635.2',
- '71.0.3578.97',
- '73.0.3637.1',
- '73.0.3637.0',
- '72.0.3626.14',
- '71.0.3578.96',
- '71.0.3578.95',
- '72.0.3626.13',
- '71.0.3578.94',
- '73.0.3636.2',
- '71.0.3578.93',
- '73.0.3636.1',
- '73.0.3636.0',
- '72.0.3626.12',
- '71.0.3578.92',
- '73.0.3635.1',
- '73.0.3635.0',
- '72.0.3626.11',
- '71.0.3578.91',
- '73.0.3634.2',
- '73.0.3634.1',
- '73.0.3634.0',
- '72.0.3626.10',
- '71.0.3578.90',
- '71.0.3578.89',
- '73.0.3633.2',
- '73.0.3633.1',
- '73.0.3633.0',
- '72.0.3610.4',
- '72.0.3626.9',
- '71.0.3578.88',
- '73.0.3632.5',
- '73.0.3632.4',
- '73.0.3632.3',
- '73.0.3632.2',
- '73.0.3632.1',
- '73.0.3632.0',
- '72.0.3626.8',
- '71.0.3578.87',
- '73.0.3631.2',
- '73.0.3631.1',
- '73.0.3631.0',
- '72.0.3626.7',
- '71.0.3578.86',
- '72.0.3626.6',
- '73.0.3630.1',
- '73.0.3630.0',
- '72.0.3626.5',
- '71.0.3578.85',
- '72.0.3626.4',
- '73.0.3628.3',
- '73.0.3628.2',
- '73.0.3629.1',
- '73.0.3629.0',
- '72.0.3626.3',
- '71.0.3578.84',
- '73.0.3628.1',
- '73.0.3628.0',
- '71.0.3578.83',
- '73.0.3627.1',
- '73.0.3627.0',
- '72.0.3626.2',
- '71.0.3578.82',
- '71.0.3578.81',
- '71.0.3578.80',
- '72.0.3626.1',
- '72.0.3626.0',
- '71.0.3578.79',
- '70.0.3538.124',
- '71.0.3578.78',
- '72.0.3623.4',
- '72.0.3625.2',
- '72.0.3625.1',
- '72.0.3625.0',
- '71.0.3578.77',
- '70.0.3538.123',
- '72.0.3624.4',
- '72.0.3624.3',
- '72.0.3624.2',
- '71.0.3578.76',
- '72.0.3624.1',
- '72.0.3624.0',
- '72.0.3623.3',
- '71.0.3578.75',
- '70.0.3538.122',
- '71.0.3578.74',
- '72.0.3623.2',
- '72.0.3610.3',
- '72.0.3623.1',
- '72.0.3623.0',
- '72.0.3622.3',
- '72.0.3622.2',
- '71.0.3578.73',
- '70.0.3538.121',
- '72.0.3622.1',
- '72.0.3622.0',
- '71.0.3578.72',
- '70.0.3538.120',
- '72.0.3621.1',
- '72.0.3621.0',
- '71.0.3578.71',
- '70.0.3538.119',
- '72.0.3620.1',
- '72.0.3620.0',
- '71.0.3578.70',
- '70.0.3538.118',
- '71.0.3578.69',
- '72.0.3619.1',
- '72.0.3619.0',
- '71.0.3578.68',
- '70.0.3538.117',
- '71.0.3578.67',
- '72.0.3618.1',
- '72.0.3618.0',
- '71.0.3578.66',
- '70.0.3538.116',
- '72.0.3617.1',
- '72.0.3617.0',
- '71.0.3578.65',
- '70.0.3538.115',
- '72.0.3602.3',
- '71.0.3578.64',
- '72.0.3616.1',
- '72.0.3616.0',
- '71.0.3578.63',
- '70.0.3538.114',
- '71.0.3578.62',
- '72.0.3615.1',
- '72.0.3615.0',
- '71.0.3578.61',
- '70.0.3538.113',
- '72.0.3614.1',
- '72.0.3614.0',
- '71.0.3578.60',
- '70.0.3538.112',
- '72.0.3613.1',
- '72.0.3613.0',
- '71.0.3578.59',
- '70.0.3538.111',
- '72.0.3612.2',
- '72.0.3612.1',
- '72.0.3612.0',
- '70.0.3538.110',
- '71.0.3578.58',
- '70.0.3538.109',
- '72.0.3611.2',
- '72.0.3611.1',
- '72.0.3611.0',
- '71.0.3578.57',
- '70.0.3538.108',
- '72.0.3610.2',
- '71.0.3578.56',
- '71.0.3578.55',
- '72.0.3610.1',
- '72.0.3610.0',
- '71.0.3578.54',
- '70.0.3538.107',
- '71.0.3578.53',
- '72.0.3609.3',
- '71.0.3578.52',
- '72.0.3609.2',
- '71.0.3578.51',
- '72.0.3608.5',
- '72.0.3609.1',
- '72.0.3609.0',
- '71.0.3578.50',
- '70.0.3538.106',
- '72.0.3608.4',
- '72.0.3608.3',
- '72.0.3608.2',
- '71.0.3578.49',
- '72.0.3608.1',
- '72.0.3608.0',
- '70.0.3538.105',
- '71.0.3578.48',
- '72.0.3607.1',
- '72.0.3607.0',
- '71.0.3578.47',
- '70.0.3538.104',
- '72.0.3606.2',
- '72.0.3606.1',
- '72.0.3606.0',
- '71.0.3578.46',
- '70.0.3538.103',
- '70.0.3538.102',
- '72.0.3605.3',
- '72.0.3605.2',
- '72.0.3605.1',
- '72.0.3605.0',
- '71.0.3578.45',
- '70.0.3538.101',
- '71.0.3578.44',
- '71.0.3578.43',
- '70.0.3538.100',
- '70.0.3538.99',
- '71.0.3578.42',
- '72.0.3604.1',
- '72.0.3604.0',
- '71.0.3578.41',
- '70.0.3538.98',
- '71.0.3578.40',
- '72.0.3603.2',
- '72.0.3603.1',
- '72.0.3603.0',
- '71.0.3578.39',
- '70.0.3538.97',
- '72.0.3602.2',
- '71.0.3578.38',
- '71.0.3578.37',
- '72.0.3602.1',
- '72.0.3602.0',
- '71.0.3578.36',
- '70.0.3538.96',
- '72.0.3601.1',
- '72.0.3601.0',
- '71.0.3578.35',
- '70.0.3538.95',
- '72.0.3600.1',
- '72.0.3600.0',
- '71.0.3578.34',
- '70.0.3538.94',
- '72.0.3599.3',
- '72.0.3599.2',
- '72.0.3599.1',
- '72.0.3599.0',
- '71.0.3578.33',
- '70.0.3538.93',
- '72.0.3598.1',
- '72.0.3598.0',
- '71.0.3578.32',
- '70.0.3538.87',
- '72.0.3597.1',
- '72.0.3597.0',
- '72.0.3596.2',
- '71.0.3578.31',
- '70.0.3538.86',
- '71.0.3578.30',
- '71.0.3578.29',
- '72.0.3596.1',
- '72.0.3596.0',
- '71.0.3578.28',
- '70.0.3538.85',
- '72.0.3595.2',
- '72.0.3591.3',
- '72.0.3595.1',
- '72.0.3595.0',
- '71.0.3578.27',
- '70.0.3538.84',
- '72.0.3594.1',
- '72.0.3594.0',
- '71.0.3578.26',
- '70.0.3538.83',
- '72.0.3593.2',
- '72.0.3593.1',
- '72.0.3593.0',
- '71.0.3578.25',
- '70.0.3538.82',
- '72.0.3589.3',
- '72.0.3592.2',
- '72.0.3592.1',
- '72.0.3592.0',
- '71.0.3578.24',
- '72.0.3589.2',
- '70.0.3538.81',
- '70.0.3538.80',
- '72.0.3591.2',
- '72.0.3591.1',
- '72.0.3591.0',
- '71.0.3578.23',
- '70.0.3538.79',
- '71.0.3578.22',
- '72.0.3590.1',
- '72.0.3590.0',
- '71.0.3578.21',
- '70.0.3538.78',
- '70.0.3538.77',
- '72.0.3589.1',
- '72.0.3589.0',
- '71.0.3578.20',
- '70.0.3538.76',
- '71.0.3578.19',
- '70.0.3538.75',
- '72.0.3588.1',
- '72.0.3588.0',
- '71.0.3578.18',
- '70.0.3538.74',
- '72.0.3586.2',
- '72.0.3587.0',
- '71.0.3578.17',
- '70.0.3538.73',
- '72.0.3586.1',
- '72.0.3586.0',
- '71.0.3578.16',
- '70.0.3538.72',
- '72.0.3585.1',
- '72.0.3585.0',
- '71.0.3578.15',
- '70.0.3538.71',
- '71.0.3578.14',
- '72.0.3584.1',
- '72.0.3584.0',
- '71.0.3578.13',
- '70.0.3538.70',
- '72.0.3583.2',
- '71.0.3578.12',
- '72.0.3583.1',
- '72.0.3583.0',
- '71.0.3578.11',
- '70.0.3538.69',
- '71.0.3578.10',
- '72.0.3582.0',
- '72.0.3581.4',
- '71.0.3578.9',
- '70.0.3538.67',
- '72.0.3581.3',
- '72.0.3581.2',
- '72.0.3581.1',
- '72.0.3581.0',
- '71.0.3578.8',
- '70.0.3538.66',
- '72.0.3580.1',
- '72.0.3580.0',
- '71.0.3578.7',
- '70.0.3538.65',
- '71.0.3578.6',
- '72.0.3579.1',
- '72.0.3579.0',
- '71.0.3578.5',
- '70.0.3538.64',
- '71.0.3578.4',
- '71.0.3578.3',
- '71.0.3578.2',
- '71.0.3578.1',
- '71.0.3578.0',
- '70.0.3538.63',
- '69.0.3497.128',
- '70.0.3538.62',
- '70.0.3538.61',
- '70.0.3538.60',
- '70.0.3538.59',
- '71.0.3577.1',
- '71.0.3577.0',
- '70.0.3538.58',
- '69.0.3497.127',
- '71.0.3576.2',
- '71.0.3576.1',
- '71.0.3576.0',
- '70.0.3538.57',
- '70.0.3538.56',
- '71.0.3575.2',
- '70.0.3538.55',
- '69.0.3497.126',
- '70.0.3538.54',
- '71.0.3575.1',
- '71.0.3575.0',
- '71.0.3574.1',
- '71.0.3574.0',
- '70.0.3538.53',
- '69.0.3497.125',
- '70.0.3538.52',
- '71.0.3573.1',
- '71.0.3573.0',
- '70.0.3538.51',
- '69.0.3497.124',
- '71.0.3572.1',
- '71.0.3572.0',
- '70.0.3538.50',
- '69.0.3497.123',
- '71.0.3571.2',
- '70.0.3538.49',
- '69.0.3497.122',
- '71.0.3571.1',
- '71.0.3571.0',
- '70.0.3538.48',
- '69.0.3497.121',
- '71.0.3570.1',
- '71.0.3570.0',
- '70.0.3538.47',
- '69.0.3497.120',
- '71.0.3568.2',
- '71.0.3569.1',
- '71.0.3569.0',
- '70.0.3538.46',
- '69.0.3497.119',
- '70.0.3538.45',
- '71.0.3568.1',
- '71.0.3568.0',
- '70.0.3538.44',
- '69.0.3497.118',
- '70.0.3538.43',
- '70.0.3538.42',
- '71.0.3567.1',
- '71.0.3567.0',
- '70.0.3538.41',
- '69.0.3497.117',
- '71.0.3566.1',
- '71.0.3566.0',
- '70.0.3538.40',
- '69.0.3497.116',
- '71.0.3565.1',
- '71.0.3565.0',
- '70.0.3538.39',
- '69.0.3497.115',
- '71.0.3564.1',
- '71.0.3564.0',
- '70.0.3538.38',
- '69.0.3497.114',
- '71.0.3563.0',
- '71.0.3562.2',
- '70.0.3538.37',
- '69.0.3497.113',
- '70.0.3538.36',
- '70.0.3538.35',
- '71.0.3562.1',
- '71.0.3562.0',
- '70.0.3538.34',
- '69.0.3497.112',
- '70.0.3538.33',
- '71.0.3561.1',
- '71.0.3561.0',
- '70.0.3538.32',
- '69.0.3497.111',
- '71.0.3559.6',
- '71.0.3560.1',
- '71.0.3560.0',
- '71.0.3559.5',
- '71.0.3559.4',
- '70.0.3538.31',
- '69.0.3497.110',
- '71.0.3559.3',
- '70.0.3538.30',
- '69.0.3497.109',
- '71.0.3559.2',
- '71.0.3559.1',
- '71.0.3559.0',
- '70.0.3538.29',
- '69.0.3497.108',
- '71.0.3558.2',
- '71.0.3558.1',
- '71.0.3558.0',
- '70.0.3538.28',
- '69.0.3497.107',
- '71.0.3557.2',
- '71.0.3557.1',
- '71.0.3557.0',
- '70.0.3538.27',
- '69.0.3497.106',
- '71.0.3554.4',
- '70.0.3538.26',
- '71.0.3556.1',
- '71.0.3556.0',
- '70.0.3538.25',
- '71.0.3554.3',
- '69.0.3497.105',
- '71.0.3554.2',
- '70.0.3538.24',
- '69.0.3497.104',
- '71.0.3555.2',
- '70.0.3538.23',
- '71.0.3555.1',
- '71.0.3555.0',
- '70.0.3538.22',
- '69.0.3497.103',
- '71.0.3554.1',
- '71.0.3554.0',
- '70.0.3538.21',
- '69.0.3497.102',
- '71.0.3553.3',
- '70.0.3538.20',
- '69.0.3497.101',
- '71.0.3553.2',
- '69.0.3497.100',
- '71.0.3553.1',
- '71.0.3553.0',
- '70.0.3538.19',
- '69.0.3497.99',
- '69.0.3497.98',
- '69.0.3497.97',
- '71.0.3552.6',
- '71.0.3552.5',
- '71.0.3552.4',
- '71.0.3552.3',
- '71.0.3552.2',
- '71.0.3552.1',
- '71.0.3552.0',
- '70.0.3538.18',
- '69.0.3497.96',
- '71.0.3551.3',
- '71.0.3551.2',
- '71.0.3551.1',
- '71.0.3551.0',
- '70.0.3538.17',
- '69.0.3497.95',
- '71.0.3550.3',
- '71.0.3550.2',
- '71.0.3550.1',
- '71.0.3550.0',
- '70.0.3538.16',
- '69.0.3497.94',
- '71.0.3549.1',
- '71.0.3549.0',
- '70.0.3538.15',
- '69.0.3497.93',
- '69.0.3497.92',
- '71.0.3548.1',
- '71.0.3548.0',
- '70.0.3538.14',
- '69.0.3497.91',
- '71.0.3547.1',
- '71.0.3547.0',
- '70.0.3538.13',
- '69.0.3497.90',
- '71.0.3546.2',
- '69.0.3497.89',
- '71.0.3546.1',
- '71.0.3546.0',
- '70.0.3538.12',
- '69.0.3497.88',
- '71.0.3545.4',
- '71.0.3545.3',
- '71.0.3545.2',
- '71.0.3545.1',
- '71.0.3545.0',
- '70.0.3538.11',
- '69.0.3497.87',
- '71.0.3544.5',
- '71.0.3544.4',
- '71.0.3544.3',
- '71.0.3544.2',
- '71.0.3544.1',
- '71.0.3544.0',
- '69.0.3497.86',
- '70.0.3538.10',
- '69.0.3497.85',
- '70.0.3538.9',
- '69.0.3497.84',
- '71.0.3543.4',
- '70.0.3538.8',
- '71.0.3543.3',
- '71.0.3543.2',
- '71.0.3543.1',
- '71.0.3543.0',
- '70.0.3538.7',
- '69.0.3497.83',
- '71.0.3542.2',
- '71.0.3542.1',
- '71.0.3542.0',
- '70.0.3538.6',
- '69.0.3497.82',
- '69.0.3497.81',
- '71.0.3541.1',
- '71.0.3541.0',
- '70.0.3538.5',
- '69.0.3497.80',
- '71.0.3540.1',
- '71.0.3540.0',
- '70.0.3538.4',
- '69.0.3497.79',
- '70.0.3538.3',
- '71.0.3539.1',
- '71.0.3539.0',
- '69.0.3497.78',
- '68.0.3440.134',
- '69.0.3497.77',
- '70.0.3538.2',
- '70.0.3538.1',
- '70.0.3538.0',
- '69.0.3497.76',
- '68.0.3440.133',
- '69.0.3497.75',
- '70.0.3537.2',
- '70.0.3537.1',
- '70.0.3537.0',
- '69.0.3497.74',
- '68.0.3440.132',
- '70.0.3536.0',
- '70.0.3535.5',
- '70.0.3535.4',
- '70.0.3535.3',
- '69.0.3497.73',
- '68.0.3440.131',
- '70.0.3532.8',
- '70.0.3532.7',
- '69.0.3497.72',
- '69.0.3497.71',
- '70.0.3535.2',
- '70.0.3535.1',
- '70.0.3535.0',
- '69.0.3497.70',
- '68.0.3440.130',
- '69.0.3497.69',
- '68.0.3440.129',
- '70.0.3534.4',
- '70.0.3534.3',
- '70.0.3534.2',
- '70.0.3534.1',
- '70.0.3534.0',
- '69.0.3497.68',
- '68.0.3440.128',
- '70.0.3533.2',
- '70.0.3533.1',
- '70.0.3533.0',
- '69.0.3497.67',
- '68.0.3440.127',
- '70.0.3532.6',
- '70.0.3532.5',
- '70.0.3532.4',
- '69.0.3497.66',
- '68.0.3440.126',
- '70.0.3532.3',
- '70.0.3532.2',
- '70.0.3532.1',
- '69.0.3497.60',
- '69.0.3497.65',
- '69.0.3497.64',
- '70.0.3532.0',
- '70.0.3531.0',
- '70.0.3530.4',
- '70.0.3530.3',
- '70.0.3530.2',
- '69.0.3497.58',
- '68.0.3440.125',
- '69.0.3497.57',
- '69.0.3497.56',
- '69.0.3497.55',
- '69.0.3497.54',
- '70.0.3530.1',
- '70.0.3530.0',
- '69.0.3497.53',
- '68.0.3440.124',
- '69.0.3497.52',
- '70.0.3529.3',
- '70.0.3529.2',
- '70.0.3529.1',
- '70.0.3529.0',
- '69.0.3497.51',
- '70.0.3528.4',
- '68.0.3440.123',
- '70.0.3528.3',
- '70.0.3528.2',
- '70.0.3528.1',
- '70.0.3528.0',
- '69.0.3497.50',
- '68.0.3440.122',
- '70.0.3527.1',
- '70.0.3527.0',
- '69.0.3497.49',
- '68.0.3440.121',
- '70.0.3526.1',
- '70.0.3526.0',
- '68.0.3440.120',
- '69.0.3497.48',
- '69.0.3497.47',
- '68.0.3440.119',
- '68.0.3440.118',
- '70.0.3525.5',
- '70.0.3525.4',
- '70.0.3525.3',
- '68.0.3440.117',
- '69.0.3497.46',
- '70.0.3525.2',
- '70.0.3525.1',
- '70.0.3525.0',
- '69.0.3497.45',
- '68.0.3440.116',
- '70.0.3524.4',
- '70.0.3524.3',
- '69.0.3497.44',
- '70.0.3524.2',
- '70.0.3524.1',
- '70.0.3524.0',
- '70.0.3523.2',
- '69.0.3497.43',
- '68.0.3440.115',
- '70.0.3505.9',
- '69.0.3497.42',
- '70.0.3505.8',
- '70.0.3523.1',
- '70.0.3523.0',
- '69.0.3497.41',
- '68.0.3440.114',
- '70.0.3505.7',
- '69.0.3497.40',
- '70.0.3522.1',
- '70.0.3522.0',
- '70.0.3521.2',
- '69.0.3497.39',
- '68.0.3440.113',
- '70.0.3505.6',
- '70.0.3521.1',
- '70.0.3521.0',
- '69.0.3497.38',
- '68.0.3440.112',
- '70.0.3520.1',
- '70.0.3520.0',
- '69.0.3497.37',
- '68.0.3440.111',
- '70.0.3519.3',
- '70.0.3519.2',
- '70.0.3519.1',
- '70.0.3519.0',
- '69.0.3497.36',
- '68.0.3440.110',
- '70.0.3518.1',
- '70.0.3518.0',
- '69.0.3497.35',
- '69.0.3497.34',
- '68.0.3440.109',
- '70.0.3517.1',
- '70.0.3517.0',
- '69.0.3497.33',
- '68.0.3440.108',
- '69.0.3497.32',
- '70.0.3516.3',
- '70.0.3516.2',
- '70.0.3516.1',
- '70.0.3516.0',
- '69.0.3497.31',
- '68.0.3440.107',
- '70.0.3515.4',
- '68.0.3440.106',
- '70.0.3515.3',
- '70.0.3515.2',
- '70.0.3515.1',
- '70.0.3515.0',
- '69.0.3497.30',
- '68.0.3440.105',
- '68.0.3440.104',
- '70.0.3514.2',
- '70.0.3514.1',
- '70.0.3514.0',
- '69.0.3497.29',
- '68.0.3440.103',
- '70.0.3513.1',
- '70.0.3513.0',
- '69.0.3497.28',
- )
- return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
-
-
-std_headers = {
- 'User-Agent': random_user_agent(),
- 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
- 'Accept-Encoding': 'gzip, deflate',
- 'Accept-Language': 'en-us,en;q=0.5',
-}
-
-
-USER_AGENTS = {
- 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
-}
-
-
-NO_DEFAULT = object()
-
-ENGLISH_MONTH_NAMES = [
- 'January', 'February', 'March', 'April', 'May', 'June',
- 'July', 'August', 'September', 'October', 'November', 'December']
-
-MONTH_NAMES = {
- 'en': ENGLISH_MONTH_NAMES,
- 'fr': [
- 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
- 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
-}
-
-KNOWN_EXTENSIONS = (
- 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
- 'flv', 'f4v', 'f4a', 'f4b',
- 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
- 'mkv', 'mka', 'mk3d',
- 'avi', 'divx',
- 'mov',
- 'asf', 'wmv', 'wma',
- '3gp', '3g2',
- 'mp3',
- 'flac',
- 'ape',
- 'wav',
- 'f4f', 'f4m', 'm3u8', 'smil')
-
-# needed for sanitizing filenames in restricted mode
-ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
- itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
- 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
-
-DATE_FORMATS = (
- '%d %B %Y',
- '%d %b %Y',
- '%B %d %Y',
- '%B %dst %Y',
- '%B %dnd %Y',
- '%B %dth %Y',
- '%b %d %Y',
- '%b %dst %Y',
- '%b %dnd %Y',
- '%b %dth %Y',
- '%b %dst %Y %I:%M',
- '%b %dnd %Y %I:%M',
- '%b %dth %Y %I:%M',
- '%Y %m %d',
- '%Y-%m-%d',
- '%Y/%m/%d',
- '%Y/%m/%d %H:%M',
- '%Y/%m/%d %H:%M:%S',
- '%Y-%m-%d %H:%M',
- '%Y-%m-%d %H:%M:%S',
- '%Y-%m-%d %H:%M:%S.%f',
- '%d.%m.%Y %H:%M',
- '%d.%m.%Y %H.%M',
- '%Y-%m-%dT%H:%M:%SZ',
- '%Y-%m-%dT%H:%M:%S.%fZ',
- '%Y-%m-%dT%H:%M:%S.%f0Z',
- '%Y-%m-%dT%H:%M:%S',
- '%Y-%m-%dT%H:%M:%S.%f',
- '%Y-%m-%dT%H:%M',
- '%b %d %Y at %H:%M',
- '%b %d %Y at %H:%M:%S',
- '%B %d %Y at %H:%M',
- '%B %d %Y at %H:%M:%S',
-)
-
-DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
-DATE_FORMATS_DAY_FIRST.extend([
- '%d-%m-%Y',
- '%d.%m.%Y',
- '%d.%m.%y',
- '%d/%m/%Y',
- '%d/%m/%y',
- '%d/%m/%Y %H:%M:%S',
-])
-
-DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
-DATE_FORMATS_MONTH_FIRST.extend([
- '%m-%d-%Y',
- '%m.%d.%Y',
- '%m/%d/%Y',
- '%m/%d/%y',
- '%m/%d/%Y %H:%M:%S',
-])
-
-PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
-JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
-
-
-def preferredencoding():
- """Get preferred encoding.
-
- Returns the best encoding scheme for the system, based on
- locale.getpreferredencoding() and some further tweaks.
- """
- try:
- pref = locale.getpreferredencoding()
- 'TEST'.encode(pref)
- except Exception:
- pref = 'UTF-8'
-
- return pref
-
-
-def write_json_file(obj, fn):
- """ Encode obj as JSON and write it to fn, atomically if possible """
-
- fn = encodeFilename(fn)
- if sys.version_info < (3, 0) and sys.platform != 'win32':
- encoding = get_filesystem_encoding()
- # os.path.basename returns a bytes object, but NamedTemporaryFile
- # will fail if the filename contains non ascii characters unless we
- # use a unicode object
- path_basename = lambda f: os.path.basename(fn).decode(encoding)
- # the same for os.path.dirname
- path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
- else:
- path_basename = os.path.basename
- path_dirname = os.path.dirname
-
- args = {
- 'suffix': '.tmp',
- 'prefix': path_basename(fn) + '.',
- 'dir': path_dirname(fn),
- 'delete': False,
- }
-
- # In Python 2.x, json.dump expects a bytestream.
- # In Python 3.x, it writes to a character stream
- if sys.version_info < (3, 0):
- args['mode'] = 'wb'
- else:
- args.update({
- 'mode': 'w',
- 'encoding': 'utf-8',
- })
-
- tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
-
- try:
- with tf:
- json.dump(obj, tf)
- if sys.platform == 'win32':
- # Need to remove existing file on Windows, else os.rename raises
- # WindowsError or FileExistsError.
- try:
- os.unlink(fn)
- except OSError:
- pass
- os.rename(tf.name, fn)
- except Exception:
- try:
- os.remove(tf.name)
- except OSError:
- pass
- raise
-
-
-if sys.version_info >= (2, 7):
- def find_xpath_attr(node, xpath, key, val=None):
- """ Find the xpath xpath[@key=val] """
- assert re.match(r'^[a-zA-Z_-]+$', key)
- expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
- return node.find(expr)
-else:
- def find_xpath_attr(node, xpath, key, val=None):
- for f in node.findall(compat_xpath(xpath)):
- if key not in f.attrib:
- continue
- if val is None or f.attrib.get(key) == val:
- return f
- return None
-
-# On python2.6 the xml.etree.ElementTree.Element methods don't support
-# the namespace parameter
-
-
-def xpath_with_ns(path, ns_map):
- components = [c.split(':') for c in path.split('/')]
- replaced = []
- for c in components:
- if len(c) == 1:
- replaced.append(c[0])
- else:
- ns, tag = c
- replaced.append('{%s}%s' % (ns_map[ns], tag))
- return '/'.join(replaced)
-
-
-def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
- def _find_xpath(xpath):
- return node.find(compat_xpath(xpath))
-
- if isinstance(xpath, (str, compat_str)):
- n = _find_xpath(xpath)
- else:
- for xp in xpath:
- n = _find_xpath(xp)
- if n is not None:
- break
-
- if n is None:
- if default is not NO_DEFAULT:
- return default
- elif fatal:
- name = xpath if name is None else name
- raise ExtractorError('Could not find XML element %s' % name)
- else:
- return None
- return n
-
-
-def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
- n = xpath_element(node, xpath, name, fatal=fatal, default=default)
- if n is None or n == default:
- return n
- if n.text is None:
- if default is not NO_DEFAULT:
- return default
- elif fatal:
- name = xpath if name is None else name
- raise ExtractorError('Could not find XML element\'s text %s' % name)
- else:
- return None
- return n.text
-
-
-def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
- n = find_xpath_attr(node, xpath, key)
- if n is None:
- if default is not NO_DEFAULT:
- return default
- elif fatal:
- name = '%s[@%s]' % (xpath, key) if name is None else name
- raise ExtractorError('Could not find XML attribute %s' % name)
- else:
- return None
- return n.attrib[key]
-
-
-def get_element_by_id(id, html):
- """Return the content of the tag with the specified ID in the passed HTML document"""
- return get_element_by_attribute('id', id, html)
-
-
-def get_element_by_class(class_name, html):
- """Return the content of the first tag with the specified class in the passed HTML document"""
- retval = get_elements_by_class(class_name, html)
- return retval[0] if retval else None
-
-
-def get_element_by_attribute(attribute, value, html, escape_value=True):
- retval = get_elements_by_attribute(attribute, value, html, escape_value)
- return retval[0] if retval else None
-
-
-def get_elements_by_class(class_name, html):
- """Return the content of all tags with the specified class in the passed HTML document as a list"""
- return get_elements_by_attribute(
- 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
- html, escape_value=False)
-
-
-def get_elements_by_attribute(attribute, value, html, escape_value=True):
- """Return the content of the tag with the specified attribute in the passed HTML document"""
-
- value = re.escape(value) if escape_value else value
-
- retlist = []
- for m in re.finditer(r'''(?xs)
- <([a-zA-Z0-9:._-]+)
- (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
- \s+%s=['"]?%s['"]?
- (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
- \s*>
- (?P<content>.*?)
- </\1>
- ''' % (re.escape(attribute), value), html):
- res = m.group('content')
-
- if res.startswith('"') or res.startswith("'"):
- res = res[1:-1]
-
- retlist.append(unescapeHTML(res))
-
- return retlist
-
-
-class HTMLAttributeParser(compat_HTMLParser):
- """Trivial HTML parser to gather the attributes for a single element"""
- def __init__(self):
- self.attrs = {}
- compat_HTMLParser.__init__(self)
-
- def handle_starttag(self, tag, attrs):
- self.attrs = dict(attrs)
-
-
-def extract_attributes(html_element):
- """Given a string for an HTML element such as
- <el
- a="foo" B="bar" c="&98;az" d=boz
- empty= noval entity="&amp;"
- sq='"' dq="'"
- >
- Decode and return a dictionary of attributes.
- {
- 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
- 'empty': '', 'noval': None, 'entity': '&',
- 'sq': '"', 'dq': '\''
- }.
- NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
- but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
- """
- parser = HTMLAttributeParser()
- try:
- parser.feed(html_element)
- parser.close()
- # Older Python may throw HTMLParseError in case of malformed HTML
- except compat_HTMLParseError:
- pass
- return parser.attrs
-
-
-def clean_html(html):
- """Clean an HTML snippet into a readable string"""
-
- if html is None: # Convenience for sanitizing descriptions etc.
- return html
-
- # Newline vs <br />
- html = html.replace('\n', ' ')
- html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
- html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
- # Strip html tags
- html = re.sub('<.*?>', '', html)
- # Replace html entities
- html = unescapeHTML(html)
- return html.strip()
-
-
-def sanitize_open(filename, open_mode):
- """Try to open the given filename, and slightly tweak it if this fails.
-
- Attempts to open the given filename. If this fails, it tries to change
- the filename slightly, step by step, until it's either able to open it
- or it fails and raises a final exception, like the standard open()
- function.
-
- It returns the tuple (stream, definitive_file_name).
- """
- try:
- if filename == '-':
- if sys.platform == 'win32':
- import msvcrt
- msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
- return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
- stream = open(encodeFilename(filename), open_mode)
- return (stream, filename)
- except (IOError, OSError) as err:
- if err.errno in (errno.EACCES,):
- raise
-
- # In case of error, try to remove win32 forbidden chars
- alt_filename = sanitize_path(filename)
- if alt_filename == filename:
- raise
- else:
- # An exception here should be caught in the caller
- stream = open(encodeFilename(alt_filename), open_mode)
- return (stream, alt_filename)
-
-
-def timeconvert(timestr):
- """Convert RFC 2822 defined time string into system timestamp"""
- timestamp = None
- timetuple = email.utils.parsedate_tz(timestr)
- if timetuple is not None:
- timestamp = email.utils.mktime_tz(timetuple)
- return timestamp
-
-
-def sanitize_filename(s, restricted=False, is_id=False):
- """Sanitizes a string so it could be used as part of a filename.
- If restricted is set, use a stricter subset of allowed characters.
- Set is_id if this is not an arbitrary string, but an ID that should be kept
- if possible.
- """
- def replace_insane(char):
- if restricted and char in ACCENT_CHARS:
- return ACCENT_CHARS[char]
- if char == '?' or ord(char) < 32 or ord(char) == 127:
- return ''
- elif char == '"':
- return '' if restricted else '\''
- elif char == ':':
- return '_-' if restricted else ' -'
- elif char in '\\/|*<>':
- return '_'
- if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
- return '_'
- if restricted and ord(char) > 127:
- return '_'
- return char
-
- # Handle timestamps
- s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
- result = ''.join(map(replace_insane, s))
- if not is_id:
- while '__' in result:
- result = result.replace('__', '_')
- result = result.strip('_')
- # Common case of "Foreign band name - English song title"
- if restricted and result.startswith('-_'):
- result = result[2:]
- if result.startswith('-'):
- result = '_' + result[len('-'):]
- result = result.lstrip('.')
- if not result:
- result = '_'
- return result
-
-
-def sanitize_path(s):
- """Sanitizes and normalizes path on Windows"""
- if sys.platform != 'win32':
- return s
- drive_or_unc, _ = os.path.splitdrive(s)
- if sys.version_info < (2, 7) and not drive_or_unc:
- drive_or_unc, _ = os.path.splitunc(s)
- norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
- if drive_or_unc:
- norm_path.pop(0)
- sanitized_path = [
- path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
- for path_part in norm_path]
- if drive_or_unc:
- sanitized_path.insert(0, drive_or_unc + os.path.sep)
- return os.path.join(*sanitized_path)
-
-
-def sanitize_url(url):
- # Prepend protocol-less URLs with `http:` scheme in order to mitigate
- # the number of unwanted failures due to missing protocol
- if url.startswith('//'):
- return 'http:%s' % url
- # Fix some common typos seen so far
- COMMON_TYPOS = (
- # https://github.com/ytdl-org/youtube-dl/issues/15649
- (r'^httpss://', r'https://'),
- # https://bx1.be/lives/direct-tv/
- (r'^rmtp([es]?)://', r'rtmp\1://'),
- )
- for mistake, fixup in COMMON_TYPOS:
- if re.match(mistake, url):
- return re.sub(mistake, fixup, url)
- return url
-
-
-def sanitized_Request(url, *args, **kwargs):
- return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
-
-
-def expand_path(s):
- """Expand shell variables and ~"""
- return os.path.expandvars(compat_expanduser(s))
-
-
-def orderedSet(iterable):
- """ Remove all duplicates from the input iterable """
- res = []
- for el in iterable:
- if el not in res:
- res.append(el)
- return res
-
-
-def _htmlentity_transform(entity_with_semicolon):
- """Transforms an HTML entity to a character."""
- entity = entity_with_semicolon[:-1]
-
- # Known non-numeric HTML entity
- if entity in compat_html_entities.name2codepoint:
- return compat_chr(compat_html_entities.name2codepoint[entity])
-
- # TODO: HTML5 allows entities without a semicolon. For example,
- # '&Eacuteric' should be decoded as 'Éric'.
- if entity_with_semicolon in compat_html_entities_html5:
- return compat_html_entities_html5[entity_with_semicolon]
-
- mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
- if mobj is not None:
- numstr = mobj.group(1)
- if numstr.startswith('x'):
- base = 16
- numstr = '0%s' % numstr
- else:
- base = 10
- # See https://github.com/ytdl-org/youtube-dl/issues/7518
- try:
- return compat_chr(int(numstr, base))
- except ValueError:
- pass
-
- # Unknown entity in name, return its literal representation
- return '&%s;' % entity
-
-
-def unescapeHTML(s):
- if s is None:
- return None
- assert type(s) == compat_str
-
- return re.sub(
- r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
-
-
-def get_subprocess_encoding():
- if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
- # For subprocess calls, encode with locale encoding
- # Refer to http://stackoverflow.com/a/9951851/35070
- encoding = preferredencoding()
- else:
- encoding = sys.getfilesystemencoding()
- if encoding is None:
- encoding = 'utf-8'
- return encoding
-
-
-def encodeFilename(s, for_subprocess=False):
- """
- @param s The name of the file
- """
-
- assert type(s) == compat_str
-
- # Python 3 has a Unicode API
- if sys.version_info >= (3, 0):
- return s
-
- # Pass '' directly to use Unicode APIs on Windows 2000 and up
- # (Detecting Windows NT 4 is tricky because 'major >= 4' would
- # match Windows 9x series as well. Besides, NT 4 is obsolete.)
- if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
- return s
-
- # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
- if sys.platform.startswith('java'):
- return s
-
- return s.encode(get_subprocess_encoding(), 'ignore')
-
-
-def decodeFilename(b, for_subprocess=False):
-
- if sys.version_info >= (3, 0):
- return b
-
- if not isinstance(b, bytes):
- return b
-
- return b.decode(get_subprocess_encoding(), 'ignore')
-
-
-def encodeArgument(s):
- if not isinstance(s, compat_str):
- # Legacy code that uses byte strings
- # Uncomment the following line after fixing all post processors
- # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
- s = s.decode('ascii')
- return encodeFilename(s, True)
-
-
-def decodeArgument(b):
- return decodeFilename(b, True)
-
-
-def decodeOption(optval):
- if optval is None:
- return optval
- if isinstance(optval, bytes):
- optval = optval.decode(preferredencoding())
-
- assert isinstance(optval, compat_str)
- return optval
-
-
-def formatSeconds(secs):
- if secs > 3600:
- return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
- elif secs > 60:
- return '%d:%02d' % (secs // 60, secs % 60)
- else:
- return '%d' % secs
-
-
-def make_HTTPS_handler(params, **kwargs):
- opts_no_check_certificate = params.get('nocheckcertificate', False)
- if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
- context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
- if opts_no_check_certificate:
- context.check_hostname = False
- context.verify_mode = ssl.CERT_NONE
- try:
- return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
- except TypeError:
- # Python 2.7.8
- # (create_default_context present but HTTPSHandler has no context=)
- pass
-
- if sys.version_info < (3, 2):
- return YoutubeDLHTTPSHandler(params, **kwargs)
- else: # Python < 3.4
- context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
- context.verify_mode = (ssl.CERT_NONE
- if opts_no_check_certificate
- else ssl.CERT_REQUIRED)
- context.set_default_verify_paths()
- return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
-
-
-def bug_reports_message():
- if ytdl_is_updateable():
- update_cmd = 'type youtube-dl -U to update'
- else:
- update_cmd = 'see https://yt-dl.org/update on how to update'
- msg = '; please report this issue on https://yt-dl.org/bug .'
- msg += ' Make sure you are using the latest version; %s.' % update_cmd
- msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
- return msg
-
-
-class YoutubeDLError(Exception):
- """Base exception for YoutubeDL errors."""
- pass
-
-
-class ExtractorError(YoutubeDLError):
- """Error during info extraction."""
-
- def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
- """ tb, if given, is the original traceback (so that it can be printed out).
- If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
- """
-
- if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
- expected = True
- if video_id is not None:
- msg = video_id + ': ' + msg
- if cause:
- msg += ' (caused by %r)' % cause
- if not expected:
- msg += bug_reports_message()
- super(ExtractorError, self).__init__(msg)
-
- self.traceback = tb
- self.exc_info = sys.exc_info() # preserve original exception
- self.cause = cause
- self.video_id = video_id
-
- def format_traceback(self):
- if self.traceback is None:
- return None
- return ''.join(traceback.format_tb(self.traceback))
-
-
-class UnsupportedError(ExtractorError):
- def __init__(self, url):
- super(UnsupportedError, self).__init__(
- 'Unsupported URL: %s' % url, expected=True)
- self.url = url
-
-
-class RegexNotFoundError(ExtractorError):
- """Error when a regex didn't match"""
- pass
-
-
-class GeoRestrictedError(ExtractorError):
- """Geographic restriction Error exception.
-
- This exception may be thrown when a video is not available from your
- geographic location due to geographic restrictions imposed by a website.
- """
- def __init__(self, msg, countries=None):
- super(GeoRestrictedError, self).__init__(msg, expected=True)
- self.msg = msg
- self.countries = countries
-
-
-class DownloadError(YoutubeDLError):
- """Download Error exception.
-
- This exception may be thrown by FileDownloader objects if they are not
- configured to continue on errors. They will contain the appropriate
- error message.
- """
-
- def __init__(self, msg, exc_info=None):
- """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
- super(DownloadError, self).__init__(msg)
- self.exc_info = exc_info
-
-
-class SameFileError(YoutubeDLError):
- """Same File exception.
-
- This exception will be thrown by FileDownloader objects if they detect
- multiple files would have to be downloaded to the same file on disk.
- """
- pass
-
-
-class PostProcessingError(YoutubeDLError):
- """Post Processing exception.
-
- This exception may be raised by PostProcessor's .run() method to
- indicate an error in the postprocessing task.
- """
-
- def __init__(self, msg):
- super(PostProcessingError, self).__init__(msg)
- self.msg = msg
-
-
-class MaxDownloadsReached(YoutubeDLError):
- """ --max-downloads limit has been reached. """
- pass
-
-
-class UnavailableVideoError(YoutubeDLError):
- """Unavailable Format exception.
-
- This exception will be thrown when a video is requested
- in a format that is not available for that video.
- """
- pass
-
-
-class ContentTooShortError(YoutubeDLError):
- """Content Too Short exception.
-
- This exception may be raised by FileDownloader objects when a file they
- download is too small for what the server announced first, indicating
- the connection was probably interrupted.
- """
-
- def __init__(self, downloaded, expected):
- super(ContentTooShortError, self).__init__(
- 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
- )
- # Both in bytes
- self.downloaded = downloaded
- self.expected = expected
-
-
-class XAttrMetadataError(YoutubeDLError):
- def __init__(self, code=None, msg='Unknown error'):
- super(XAttrMetadataError, self).__init__(msg)
- self.code = code
- self.msg = msg
-
- # Parsing code and msg
- if (self.code in (errno.ENOSPC, errno.EDQUOT)
- or 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
- self.reason = 'NO_SPACE'
- elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
- self.reason = 'VALUE_TOO_LONG'
- else:
- self.reason = 'NOT_SUPPORTED'
-
-
-class XAttrUnavailableError(YoutubeDLError):
- pass
-
-
-def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
- # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
- # expected HTTP responses to meet HTTP/1.0 or later (see also
- # https://github.com/ytdl-org/youtube-dl/issues/6727)
- if sys.version_info < (3, 0):
- kwargs['strict'] = True
- hc = http_class(*args, **compat_kwargs(kwargs))
- source_address = ydl_handler._params.get('source_address')
-
- if source_address is not None:
- # This is to workaround _create_connection() from socket where it will try all
- # address data from getaddrinfo() including IPv6. This filters the result from
- # getaddrinfo() based on the source_address value.
- # This is based on the cpython socket.create_connection() function.
- # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
- def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
- host, port = address
- err = None
- addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
- af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
- ip_addrs = [addr for addr in addrs if addr[0] == af]
- if addrs and not ip_addrs:
- ip_version = 'v4' if af == socket.AF_INET else 'v6'
- raise socket.error(
- "No remote IP%s addresses available for connect, can't use '%s' as source address"
- % (ip_version, source_address[0]))
- for res in ip_addrs:
- af, socktype, proto, canonname, sa = res
- sock = None
- try:
- sock = socket.socket(af, socktype, proto)
- if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
- sock.settimeout(timeout)
- sock.bind(source_address)
- sock.connect(sa)
- err = None # Explicitly break reference cycle
- return sock
- except socket.error as _:
- err = _
- if sock is not None:
- sock.close()
- if err is not None:
- raise err
- else:
- raise socket.error('getaddrinfo returns an empty list')
- if hasattr(hc, '_create_connection'):
- hc._create_connection = _create_connection
- sa = (source_address, 0)
- if hasattr(hc, 'source_address'): # Python 2.7+
- hc.source_address = sa
- else: # Python 2.6
- def _hc_connect(self, *args, **kwargs):
- sock = _create_connection(
- (self.host, self.port), self.timeout, sa)
- if is_https:
- self.sock = ssl.wrap_socket(
- sock, self.key_file, self.cert_file,
- ssl_version=ssl.PROTOCOL_TLSv1)
- else:
- self.sock = sock
- hc.connect = functools.partial(_hc_connect, hc)
-
- return hc
-
-
-def handle_youtubedl_headers(headers):
- filtered_headers = headers
-
- if 'Youtubedl-no-compression' in filtered_headers:
- filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
- del filtered_headers['Youtubedl-no-compression']
-
- return filtered_headers
-
-
-class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
- """Handler for HTTP requests and responses.
-
- This class, when installed with an OpenerDirector, automatically adds
- the standard headers to every HTTP request and handles gzipped and
- deflated responses from web servers. If compression is to be avoided in
- a particular request, the original request in the program code only has
- to include the HTTP header "Youtubedl-no-compression", which will be
- removed before making the real request.
-
- Part of this code was copied from:
-
- http://techknack.net/python-urllib2-handlers/
-
- Andrew Rowls, the author of that code, agreed to release it to the
- public domain.
- """
-
- def __init__(self, params, *args, **kwargs):
- compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
- self._params = params
-
- def http_open(self, req):
- conn_class = compat_http_client.HTTPConnection
-
- socks_proxy = req.headers.get('Ytdl-socks-proxy')
- if socks_proxy:
- conn_class = make_socks_conn_class(conn_class, socks_proxy)
- del req.headers['Ytdl-socks-proxy']
-
- return self.do_open(functools.partial(
- _create_http_connection, self, conn_class, False),
- req)
-
- @staticmethod
- def deflate(data):
- try:
- return zlib.decompress(data, -zlib.MAX_WBITS)
- except zlib.error:
- return zlib.decompress(data)
-
- def http_request(self, req):
- # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
- # always respected by websites, some tend to give out URLs with non percent-encoded
- # non-ASCII characters (see telemb.py, ard.py [#3412])
- # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
- # To work around aforementioned issue we will replace request's original URL with
- # percent-encoded one
- # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
- # the code of this workaround has been moved here from YoutubeDL.urlopen()
- url = req.get_full_url()
- url_escaped = escape_url(url)
-
- # Substitute URL if any change after escaping
- if url != url_escaped:
- req = update_Request(req, url=url_escaped)
-
- for h, v in std_headers.items():
- # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
- # The dict keys are capitalized because of this bug by urllib
- if h.capitalize() not in req.headers:
- req.add_header(h, v)
-
- req.headers = handle_youtubedl_headers(req.headers)
-
- if sys.version_info < (2, 7) and '#' in req.get_full_url():
- # Python 2.6 is brain-dead when it comes to fragments
- req._Request__original = req._Request__original.partition('#')[0]
- req._Request__r_type = req._Request__r_type.partition('#')[0]
-
- return req
-
- def http_response(self, req, resp):
- old_resp = resp
- # gzip
- if resp.headers.get('Content-encoding', '') == 'gzip':
- content = resp.read()
- gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
- try:
- uncompressed = io.BytesIO(gz.read())
- except IOError as original_ioerror:
- # There may be junk add the end of the file
- # See http://stackoverflow.com/q/4928560/35070 for details
- for i in range(1, 1024):
- try:
- gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
- uncompressed = io.BytesIO(gz.read())
- except IOError:
- continue
- break
- else:
- raise original_ioerror
- resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
- resp.msg = old_resp.msg
- del resp.headers['Content-encoding']
- # deflate
- if resp.headers.get('Content-encoding', '') == 'deflate':
- gz = io.BytesIO(self.deflate(resp.read()))
- resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
- resp.msg = old_resp.msg
- del resp.headers['Content-encoding']
- # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
- # https://github.com/ytdl-org/youtube-dl/issues/6457).
- if 300 <= resp.code < 400:
- location = resp.headers.get('Location')
- if location:
- # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
- if sys.version_info >= (3, 0):
- location = location.encode('iso-8859-1').decode('utf-8')
- else:
- location = location.decode('utf-8')
- location_escaped = escape_url(location)
- if location != location_escaped:
- del resp.headers['Location']
- if sys.version_info < (3, 0):
- location_escaped = location_escaped.encode('utf-8')
- resp.headers['Location'] = location_escaped
- return resp
-
- https_request = http_request
- https_response = http_response
-
-
-def make_socks_conn_class(base_class, socks_proxy):
- assert issubclass(base_class, (
- compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
-
- url_components = compat_urlparse.urlparse(socks_proxy)
- if url_components.scheme.lower() == 'socks5':
- socks_type = ProxyType.SOCKS5
- elif url_components.scheme.lower() in ('socks', 'socks4'):
- socks_type = ProxyType.SOCKS4
- elif url_components.scheme.lower() == 'socks4a':
- socks_type = ProxyType.SOCKS4A
-
- def unquote_if_non_empty(s):
- if not s:
- return s
- return compat_urllib_parse_unquote_plus(s)
-
- proxy_args = (
- socks_type,
- url_components.hostname, url_components.port or 1080,
- True, # Remote DNS
- unquote_if_non_empty(url_components.username),
- unquote_if_non_empty(url_components.password),
- )
-
- class SocksConnection(base_class):
- def connect(self):
- self.sock = sockssocket()
- self.sock.setproxy(*proxy_args)
- if type(self.timeout) in (int, float):
- self.sock.settimeout(self.timeout)
- self.sock.connect((self.host, self.port))
-
- if isinstance(self, compat_http_client.HTTPSConnection):
- if hasattr(self, '_context'): # Python > 2.6
- self.sock = self._context.wrap_socket(
- self.sock, server_hostname=self.host)
- else:
- self.sock = ssl.wrap_socket(self.sock)
-
- return SocksConnection
-
-
-class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
- def __init__(self, params, https_conn_class=None, *args, **kwargs):
- compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
- self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
- self._params = params
-
- def https_open(self, req):
- kwargs = {}
- conn_class = self._https_conn_class
-
- if hasattr(self, '_context'): # python > 2.6
- kwargs['context'] = self._context
- if hasattr(self, '_check_hostname'): # python 3.x
- kwargs['check_hostname'] = self._check_hostname
-
- socks_proxy = req.headers.get('Ytdl-socks-proxy')
- if socks_proxy:
- conn_class = make_socks_conn_class(conn_class, socks_proxy)
- del req.headers['Ytdl-socks-proxy']
-
- return self.do_open(functools.partial(
- _create_http_connection, self, conn_class, True),
- req, **kwargs)
-
-
-class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
- _HTTPONLY_PREFIX = '#HttpOnly_'
-
- def save(self, filename=None, ignore_discard=False, ignore_expires=False):
- # Store session cookies with `expires` set to 0 instead of an empty
- # string
- for cookie in self:
- if cookie.expires is None:
- cookie.expires = 0
- compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires)
-
- def load(self, filename=None, ignore_discard=False, ignore_expires=False):
- """Load cookies from a file."""
- if filename is None:
- if self.filename is not None:
- filename = self.filename
- else:
- raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
-
- cf = io.StringIO()
- with open(filename) as f:
- for line in f:
- if line.startswith(self._HTTPONLY_PREFIX):
- line = line[len(self._HTTPONLY_PREFIX):]
- cf.write(compat_str(line))
- cf.seek(0)
- self._really_load(cf, filename, ignore_discard, ignore_expires)
- # Session cookies are denoted by either `expires` field set to
- # an empty string or 0. MozillaCookieJar only recognizes the former
- # (see [1]). So we need force the latter to be recognized as session
- # cookies on our own.
- # Session cookies may be important for cookies-based authentication,
- # e.g. usually, when user does not check 'Remember me' check box while
- # logging in on a site, some important cookies are stored as session
- # cookies so that not recognizing them will result in failed login.
- # 1. https://bugs.python.org/issue17164
- for cookie in self:
- # Treat `expires=0` cookies as session cookies
- if cookie.expires == 0:
- cookie.expires = None
- cookie.discard = True
-
-
-class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
- def __init__(self, cookiejar=None):
- compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
-
- def http_response(self, request, response):
- # Python 2 will choke on next HTTP request in row if there are non-ASCII
- # characters in Set-Cookie HTTP header of last response (see
- # https://github.com/ytdl-org/youtube-dl/issues/6769).
- # In order to at least prevent crashing we will percent encode Set-Cookie
- # header before HTTPCookieProcessor starts processing it.
- # if sys.version_info < (3, 0) and response.headers:
- # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
- # set_cookie = response.headers.get(set_cookie_header)
- # if set_cookie:
- # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
- # if set_cookie != set_cookie_escaped:
- # del response.headers[set_cookie_header]
- # response.headers[set_cookie_header] = set_cookie_escaped
- return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
-
- https_request = compat_urllib_request.HTTPCookieProcessor.http_request
- https_response = http_response
-
-
-def extract_timezone(date_str):
- m = re.search(
- r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
- date_str)
- if not m:
- timezone = datetime.timedelta()
- else:
- date_str = date_str[:-len(m.group('tz'))]
- if not m.group('sign'):
- timezone = datetime.timedelta()
- else:
- sign = 1 if m.group('sign') == '+' else -1
- timezone = datetime.timedelta(
- hours=sign * int(m.group('hours')),
- minutes=sign * int(m.group('minutes')))
- return timezone, date_str
-
-
-def parse_iso8601(date_str, delimiter='T', timezone=None):
- """ Return a UNIX timestamp from the given date """
-
- if date_str is None:
- return None
-
- date_str = re.sub(r'\.[0-9]+', '', date_str)
-
- if timezone is None:
- timezone, date_str = extract_timezone(date_str)
-
- try:
- date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
- dt = datetime.datetime.strptime(date_str, date_format) - timezone
- return calendar.timegm(dt.timetuple())
- except ValueError:
- pass
-
-
-def date_formats(day_first=True):
- return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
-
-
-def unified_strdate(date_str, day_first=True):
- """Return a string with the date in the format YYYYMMDD"""
-
- if date_str is None:
- return None
- upload_date = None
- # Replace commas
- date_str = date_str.replace(',', ' ')
- # Remove AM/PM + timezone
- date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
- _, date_str = extract_timezone(date_str)
-
- for expression in date_formats(day_first):
- try:
- upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
- except ValueError:
- pass
- if upload_date is None:
- timetuple = email.utils.parsedate_tz(date_str)
- if timetuple:
- try:
- upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
- except ValueError:
- pass
- if upload_date is not None:
- return compat_str(upload_date)
-
-
-def unified_timestamp(date_str, day_first=True):
- if date_str is None:
- return None
-
- date_str = re.sub(r'[,|]', '', date_str)
-
- pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
- timezone, date_str = extract_timezone(date_str)
-
- # Remove AM/PM + timezone
- date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
-
- # Remove unrecognized timezones from ISO 8601 alike timestamps
- m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
- if m:
- date_str = date_str[:-len(m.group('tz'))]
-
- # Python only supports microseconds, so remove nanoseconds
- m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
- if m:
- date_str = m.group(1)
-
- for expression in date_formats(day_first):
- try:
- dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
- return calendar.timegm(dt.timetuple())
- except ValueError:
- pass
- timetuple = email.utils.parsedate_tz(date_str)
- if timetuple:
- return calendar.timegm(timetuple) + pm_delta * 3600
-
-
-def determine_ext(url, default_ext='unknown_video'):
- if url is None or '.' not in url:
- return default_ext
- guess = url.partition('?')[0].rpartition('.')[2]
- if re.match(r'^[A-Za-z0-9]+$', guess):
- return guess
- # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
- elif guess.rstrip('/') in KNOWN_EXTENSIONS:
- return guess.rstrip('/')
- else:
- return default_ext
-
-
-def subtitles_filename(filename, sub_lang, sub_format):
- return filename.rsplit('.', 1)[0] + '.' + sub_lang + '.' + sub_format
-
-
-def date_from_str(date_str):
- """
- Return a datetime object from a string in the format YYYYMMDD or
- (now|today)[+-][0-9](day|week|month|year)(s)?"""
- today = datetime.date.today()
- if date_str in ('now', 'today'):
- return today
- if date_str == 'yesterday':
- return today - datetime.timedelta(days=1)
- match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
- if match is not None:
- sign = match.group('sign')
- time = int(match.group('time'))
- if sign == '-':
- time = -time
- unit = match.group('unit')
- # A bad approximation?
- if unit == 'month':
- unit = 'day'
- time *= 30
- elif unit == 'year':
- unit = 'day'
- time *= 365
- unit += 's'
- delta = datetime.timedelta(**{unit: time})
- return today + delta
- return datetime.datetime.strptime(date_str, '%Y%m%d').date()
-
-
-def hyphenate_date(date_str):
- """
- Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
- match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
- if match is not None:
- return '-'.join(match.groups())
- else:
- return date_str
-
-
-class DateRange(object):
- """Represents a time interval between two dates"""
-
- def __init__(self, start=None, end=None):
- """start and end must be strings in the format accepted by date"""
- if start is not None:
- self.start = date_from_str(start)
- else:
- self.start = datetime.datetime.min.date()
- if end is not None:
- self.end = date_from_str(end)
- else:
- self.end = datetime.datetime.max.date()
- if self.start > self.end:
- raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
-
- @classmethod
- def day(cls, day):
- """Returns a range that only contains the given day"""
- return cls(day, day)
-
- def __contains__(self, date):
- """Check if the date is in the range"""
- if not isinstance(date, datetime.date):
- date = date_from_str(date)
- return self.start <= date <= self.end
-
- def __str__(self):
- return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
-
-
-def platform_name():
- """ Returns the platform name as a compat_str """
- res = platform.platform()
- if isinstance(res, bytes):
- res = res.decode(preferredencoding())
-
- assert isinstance(res, compat_str)
- return res
-
-
-def _windows_write_string(s, out):
- """ Returns True if the string was written using special methods,
- False if it has yet to be written out."""
- # Adapted from http://stackoverflow.com/a/3259271/35070
-
- import ctypes
- import ctypes.wintypes
-
- WIN_OUTPUT_IDS = {
- 1: -11,
- 2: -12,
- }
-
- try:
- fileno = out.fileno()
- except AttributeError:
- # If the output stream doesn't have a fileno, it's virtual
- return False
- except io.UnsupportedOperation:
- # Some strange Windows pseudo files?
- return False
- if fileno not in WIN_OUTPUT_IDS:
- return False
-
- GetStdHandle = compat_ctypes_WINFUNCTYPE(
- ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
- ('GetStdHandle', ctypes.windll.kernel32))
- h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
-
- WriteConsoleW = compat_ctypes_WINFUNCTYPE(
- ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
- ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
- ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
- written = ctypes.wintypes.DWORD(0)
-
- GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
- FILE_TYPE_CHAR = 0x0002
- FILE_TYPE_REMOTE = 0x8000
- GetConsoleMode = compat_ctypes_WINFUNCTYPE(
- ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
- ctypes.POINTER(ctypes.wintypes.DWORD))(
- ('GetConsoleMode', ctypes.windll.kernel32))
- INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
-
- def not_a_console(handle):
- if handle == INVALID_HANDLE_VALUE or handle is None:
- return True
- return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
- or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
-
- if not_a_console(h):
- return False
-
- def next_nonbmp_pos(s):
- try:
- return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
- except StopIteration:
- return len(s)
-
- while s:
- count = min(next_nonbmp_pos(s), 1024)
-
- ret = WriteConsoleW(
- h, s, count if count else 2, ctypes.byref(written), None)
- if ret == 0:
- raise OSError('Failed to write string')
- if not count: # We just wrote a non-BMP character
- assert written.value == 2
- s = s[1:]
- else:
- assert written.value > 0
- s = s[written.value:]
- return True
-
-
-def write_string(s, out=None, encoding=None):
- if out is None:
- out = sys.stderr
- assert type(s) == compat_str
-
- if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
- if _windows_write_string(s, out):
- return
-
- if ('b' in getattr(out, 'mode', '')
- or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
- byt = s.encode(encoding or preferredencoding(), 'ignore')
- out.write(byt)
- elif hasattr(out, 'buffer'):
- enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
- byt = s.encode(enc, 'ignore')
- out.buffer.write(byt)
- else:
- out.write(s)
- out.flush()
-
-
-def bytes_to_intlist(bs):
- if not bs:
- return []
- if isinstance(bs[0], int): # Python 3
- return list(bs)
- else:
- return [ord(c) for c in bs]
-
-
-def intlist_to_bytes(xs):
- if not xs:
- return b''
- return compat_struct_pack('%dB' % len(xs), *xs)
-
-
-# Cross-platform file locking
-if sys.platform == 'win32':
- import ctypes.wintypes
- import msvcrt
-
- class OVERLAPPED(ctypes.Structure):
- _fields_ = [
- ('Internal', ctypes.wintypes.LPVOID),
- ('InternalHigh', ctypes.wintypes.LPVOID),
- ('Offset', ctypes.wintypes.DWORD),
- ('OffsetHigh', ctypes.wintypes.DWORD),
- ('hEvent', ctypes.wintypes.HANDLE),
- ]
-
- kernel32 = ctypes.windll.kernel32
- LockFileEx = kernel32.LockFileEx
- LockFileEx.argtypes = [
- ctypes.wintypes.HANDLE, # hFile
- ctypes.wintypes.DWORD, # dwFlags
- ctypes.wintypes.DWORD, # dwReserved
- ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
- ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
- ctypes.POINTER(OVERLAPPED) # Overlapped
- ]
- LockFileEx.restype = ctypes.wintypes.BOOL
- UnlockFileEx = kernel32.UnlockFileEx
- UnlockFileEx.argtypes = [
- ctypes.wintypes.HANDLE, # hFile
- ctypes.wintypes.DWORD, # dwReserved
- ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
- ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
- ctypes.POINTER(OVERLAPPED) # Overlapped
- ]
- UnlockFileEx.restype = ctypes.wintypes.BOOL
- whole_low = 0xffffffff
- whole_high = 0x7fffffff
-
- def _lock_file(f, exclusive):
- overlapped = OVERLAPPED()
- overlapped.Offset = 0
- overlapped.OffsetHigh = 0
- overlapped.hEvent = 0
- f._lock_file_overlapped_p = ctypes.pointer(overlapped)
- handle = msvcrt.get_osfhandle(f.fileno())
- if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
- whole_low, whole_high, f._lock_file_overlapped_p):
- raise OSError('Locking file failed: %r' % ctypes.FormatError())
-
- def _unlock_file(f):
- assert f._lock_file_overlapped_p
- handle = msvcrt.get_osfhandle(f.fileno())
- if not UnlockFileEx(handle, 0,
- whole_low, whole_high, f._lock_file_overlapped_p):
- raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
-
-else:
- # Some platforms, such as Jython, is missing fcntl
- try:
- import fcntl
-
- def _lock_file(f, exclusive):
- fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
-
- def _unlock_file(f):
- fcntl.flock(f, fcntl.LOCK_UN)
- except ImportError:
- UNSUPPORTED_MSG = 'file locking is not supported on this platform'
-
- def _lock_file(f, exclusive):
- raise IOError(UNSUPPORTED_MSG)
-
- def _unlock_file(f):
- raise IOError(UNSUPPORTED_MSG)
-
-
-class locked_file(object):
- def __init__(self, filename, mode, encoding=None):
- assert mode in ['r', 'a', 'w']
- self.f = io.open(filename, mode, encoding=encoding)
- self.mode = mode
-
- def __enter__(self):
- exclusive = self.mode != 'r'
- try:
- _lock_file(self.f, exclusive)
- except IOError:
- self.f.close()
- raise
- return self
-
- def __exit__(self, etype, value, traceback):
- try:
- _unlock_file(self.f)
- finally:
- self.f.close()
-
- def __iter__(self):
- return iter(self.f)
-
- def write(self, *args):
- return self.f.write(*args)
-
- def read(self, *args):
- return self.f.read(*args)
-
-
-def get_filesystem_encoding():
- encoding = sys.getfilesystemencoding()
- return encoding if encoding is not None else 'utf-8'
-
-
-def shell_quote(args):
- quoted_args = []
- encoding = get_filesystem_encoding()
- for a in args:
- if isinstance(a, bytes):
- # We may get a filename encoded with 'encodeFilename'
- a = a.decode(encoding)
- quoted_args.append(compat_shlex_quote(a))
- return ' '.join(quoted_args)
-
-
-def smuggle_url(url, data):
- """ Pass additional data in a URL for internal use. """
-
- url, idata = unsmuggle_url(url, {})
- data.update(idata)
- sdata = compat_urllib_parse_urlencode(
- {'__youtubedl_smuggle': json.dumps(data)})
- return url + '#' + sdata
-
-
-def unsmuggle_url(smug_url, default=None):
- if '#__youtubedl_smuggle' not in smug_url:
- return smug_url, default
- url, _, sdata = smug_url.rpartition('#')
- jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
- data = json.loads(jsond)
- return url, data
-
-
-def format_bytes(bytes):
- if bytes is None:
- return 'N/A'
- if type(bytes) is str:
- bytes = float(bytes)
- if bytes == 0.0:
- exponent = 0
- else:
- exponent = int(math.log(bytes, 1024.0))
- suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
- converted = float(bytes) / float(1024 ** exponent)
- return '%.2f%s' % (converted, suffix)
-
-
-def lookup_unit_table(unit_table, s):
- units_re = '|'.join(re.escape(u) for u in unit_table)
- m = re.match(
- r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
- if not m:
- return None
- num_str = m.group('num').replace(',', '.')
- mult = unit_table[m.group('unit')]
- return int(float(num_str) * mult)
-
-
-def parse_filesize(s):
- if s is None:
- return None
-
- # The lower-case forms are of course incorrect and unofficial,
- # but we support those too
- _UNIT_TABLE = {
- 'B': 1,
- 'b': 1,
- 'bytes': 1,
- 'KiB': 1024,
- 'KB': 1000,
- 'kB': 1024,
- 'Kb': 1000,
- 'kb': 1000,
- 'kilobytes': 1000,
- 'kibibytes': 1024,
- 'MiB': 1024 ** 2,
- 'MB': 1000 ** 2,
- 'mB': 1024 ** 2,
- 'Mb': 1000 ** 2,
- 'mb': 1000 ** 2,
- 'megabytes': 1000 ** 2,
- 'mebibytes': 1024 ** 2,
- 'GiB': 1024 ** 3,
- 'GB': 1000 ** 3,
- 'gB': 1024 ** 3,
- 'Gb': 1000 ** 3,
- 'gb': 1000 ** 3,
- 'gigabytes': 1000 ** 3,
- 'gibibytes': 1024 ** 3,
- 'TiB': 1024 ** 4,
- 'TB': 1000 ** 4,
- 'tB': 1024 ** 4,
- 'Tb': 1000 ** 4,
- 'tb': 1000 ** 4,
- 'terabytes': 1000 ** 4,
- 'tebibytes': 1024 ** 4,
- 'PiB': 1024 ** 5,
- 'PB': 1000 ** 5,
- 'pB': 1024 ** 5,
- 'Pb': 1000 ** 5,
- 'pb': 1000 ** 5,
- 'petabytes': 1000 ** 5,
- 'pebibytes': 1024 ** 5,
- 'EiB': 1024 ** 6,
- 'EB': 1000 ** 6,
- 'eB': 1024 ** 6,
- 'Eb': 1000 ** 6,
- 'eb': 1000 ** 6,
- 'exabytes': 1000 ** 6,
- 'exbibytes': 1024 ** 6,
- 'ZiB': 1024 ** 7,
- 'ZB': 1000 ** 7,
- 'zB': 1024 ** 7,
- 'Zb': 1000 ** 7,
- 'zb': 1000 ** 7,
- 'zettabytes': 1000 ** 7,
- 'zebibytes': 1024 ** 7,
- 'YiB': 1024 ** 8,
- 'YB': 1000 ** 8,
- 'yB': 1024 ** 8,
- 'Yb': 1000 ** 8,
- 'yb': 1000 ** 8,
- 'yottabytes': 1000 ** 8,
- 'yobibytes': 1024 ** 8,
- }
-
- return lookup_unit_table(_UNIT_TABLE, s)
-
-
-def parse_count(s):
- if s is None:
- return None
-
- s = s.strip()
-
- if re.match(r'^[\d,.]+$', s):
- return str_to_int(s)
-
- _UNIT_TABLE = {
- 'k': 1000,
- 'K': 1000,
- 'm': 1000 ** 2,
- 'M': 1000 ** 2,
- 'kk': 1000 ** 2,
- 'KK': 1000 ** 2,
- }
-
- return lookup_unit_table(_UNIT_TABLE, s)
-
-
-def parse_resolution(s):
- if s is None:
- return {}
-
- mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
- if mobj:
- return {
- 'width': int(mobj.group('w')),
- 'height': int(mobj.group('h')),
- }
-
- mobj = re.search(r'\b(\d+)[pPiI]\b', s)
- if mobj:
- return {'height': int(mobj.group(1))}
-
- mobj = re.search(r'\b([48])[kK]\b', s)
- if mobj:
- return {'height': int(mobj.group(1)) * 540}
-
- return {}
-
-
-def parse_bitrate(s):
- if not isinstance(s, compat_str):
- return
- mobj = re.search(r'\b(\d+)\s*kbps', s)
- if mobj:
- return int(mobj.group(1))
-
-
-def month_by_name(name, lang='en'):
- """ Return the number of a month by (locale-independently) English name """
-
- month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
-
- try:
- return month_names.index(name) + 1
- except ValueError:
- return None
-
-
-def month_by_abbreviation(abbrev):
- """ Return the number of a month by (locale-independently) English
- abbreviations """
-
- try:
- return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
- except ValueError:
- return None
-
-
-def fix_xml_ampersands(xml_str):
- """Replace all the '&' by '&amp;' in XML"""
- return re.sub(
- r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
- '&amp;',
- xml_str)
-
-
-def setproctitle(title):
- assert isinstance(title, compat_str)
-
- # ctypes in Jython is not complete
- # http://bugs.jython.org/issue2148
- if sys.platform.startswith('java'):
- return
-
- try:
- libc = ctypes.cdll.LoadLibrary('libc.so.6')
- except OSError:
- return
- except TypeError:
- # LoadLibrary in Windows Python 2.7.13 only expects
- # a bytestring, but since unicode_literals turns
- # every string into a unicode string, it fails.
- return
- title_bytes = title.encode('utf-8')
- buf = ctypes.create_string_buffer(len(title_bytes))
- buf.value = title_bytes
- try:
- libc.prctl(15, buf, 0, 0, 0)
- except AttributeError:
- return # Strange libc, just skip this
-
-
-def remove_start(s, start):
- return s[len(start):] if s is not None and s.startswith(start) else s
-
-
-def remove_end(s, end):
- return s[:-len(end)] if s is not None and s.endswith(end) else s
-
-
-def remove_quotes(s):
- if s is None or len(s) < 2:
- return s
- for quote in ('"', "'", ):
- if s[0] == quote and s[-1] == quote:
- return s[1:-1]
- return s
-
-
-def url_basename(url):
- path = compat_urlparse.urlparse(url).path
- return path.strip('/').split('/')[-1]
-
-
-def base_url(url):
- return re.match(r'https?://[^?#&]+/', url).group()
-
-
-def urljoin(base, path):
- if isinstance(path, bytes):
- path = path.decode('utf-8')
- if not isinstance(path, compat_str) or not path:
- return None
- if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
- return path
- if isinstance(base, bytes):
- base = base.decode('utf-8')
- if not isinstance(base, compat_str) or not re.match(
- r'^(?:https?:)?//', base):
- return None
- return compat_urlparse.urljoin(base, path)
-
-
-class HEADRequest(compat_urllib_request.Request):
- def get_method(self):
- return 'HEAD'
-
-
-class PUTRequest(compat_urllib_request.Request):
- def get_method(self):
- return 'PUT'
-
-
-def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
- if get_attr:
- if v is not None:
- v = getattr(v, get_attr, None)
- if v == '':
- v = None
- if v is None:
- return default
- try:
- return int(v) * invscale // scale
- except (ValueError, TypeError):
- return default
-
-
-def str_or_none(v, default=None):
- return default if v is None else compat_str(v)
-
-
-def str_to_int(int_str):
- """ A more relaxed version of int_or_none """
- if int_str is None:
- return None
- int_str = re.sub(r'[,\.\+]', '', int_str)
- return int(int_str)
-
-
-def float_or_none(v, scale=1, invscale=1, default=None):
- if v is None:
- return default
- try:
- return float(v) * invscale / scale
- except (ValueError, TypeError):
- return default
-
-
-def bool_or_none(v, default=None):
- return v if isinstance(v, bool) else default
-
-
-def strip_or_none(v, default=None):
- return v.strip() if isinstance(v, compat_str) else default
-
-
-def url_or_none(url):
- if not url or not isinstance(url, compat_str):
- return None
- url = url.strip()
- return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None
-
-
-def parse_duration(s):
- if not isinstance(s, compat_basestring):
- return None
-
- s = s.strip()
-
- days, hours, mins, secs, ms = [None] * 5
- m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
- if m:
- days, hours, mins, secs, ms = m.groups()
- else:
- m = re.match(
- r'''(?ix)(?:P?
- (?:
- [0-9]+\s*y(?:ears?)?\s*
- )?
- (?:
- [0-9]+\s*m(?:onths?)?\s*
- )?
- (?:
- [0-9]+\s*w(?:eeks?)?\s*
- )?
- (?:
- (?P<days>[0-9]+)\s*d(?:ays?)?\s*
- )?
- T)?
- (?:
- (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
- )?
- (?:
- (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
- )?
- (?:
- (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
- )?Z?$''', s)
- if m:
- days, hours, mins, secs, ms = m.groups()
- else:
- m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
- if m:
- hours, mins = m.groups()
- else:
- return None
-
- duration = 0
- if secs:
- duration += float(secs)
- if mins:
- duration += float(mins) * 60
- if hours:
- duration += float(hours) * 60 * 60
- if days:
- duration += float(days) * 24 * 60 * 60
- if ms:
- duration += float(ms)
- return duration
-
-
-def prepend_extension(filename, ext, expected_real_ext=None):
- name, real_ext = os.path.splitext(filename)
- return (
- '{0}.{1}{2}'.format(name, ext, real_ext)
- if not expected_real_ext or real_ext[1:] == expected_real_ext
- else '{0}.{1}'.format(filename, ext))
-
-
-def replace_extension(filename, ext, expected_real_ext=None):
- name, real_ext = os.path.splitext(filename)
- return '{0}.{1}'.format(
- name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
- ext)
-
-
-def check_executable(exe, args=[]):
- """ Checks if the given binary is installed somewhere in PATH, and returns its name.
- args can be a list of arguments for a short output (like -version) """
- try:
- subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
- except OSError:
- return False
- return exe
-
-
-def get_exe_version(exe, args=['--version'],
- version_re=None, unrecognized='present'):
- """ Returns the version of the specified executable,
- or False if the executable is not present """
- try:
- # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
- # SIGTTOU if youtube-dl is run in the background.
- # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
- out, _ = subprocess.Popen(
- [encodeArgument(exe)] + args,
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
- except OSError:
- return False
- if isinstance(out, bytes): # Python 2.x
- out = out.decode('ascii', 'ignore')
- return detect_exe_version(out, version_re, unrecognized)
-
-
-def detect_exe_version(output, version_re=None, unrecognized='present'):
- assert isinstance(output, compat_str)
- if version_re is None:
- version_re = r'version\s+([-0-9._a-zA-Z]+)'
- m = re.search(version_re, output)
- if m:
- return m.group(1)
- else:
- return unrecognized
-
-
-class PagedList(object):
- def __len__(self):
- # This is only useful for tests
- return len(self.getslice())
-
-
-class OnDemandPagedList(PagedList):
- def __init__(self, pagefunc, pagesize, use_cache=True):
- self._pagefunc = pagefunc
- self._pagesize = pagesize
- self._use_cache = use_cache
- if use_cache:
- self._cache = {}
-
- def getslice(self, start=0, end=None):
- res = []
- for pagenum in itertools.count(start // self._pagesize):
- firstid = pagenum * self._pagesize
- nextfirstid = pagenum * self._pagesize + self._pagesize
- if start >= nextfirstid:
- continue
-
- page_results = None
- if self._use_cache:
- page_results = self._cache.get(pagenum)
- if page_results is None:
- page_results = list(self._pagefunc(pagenum))
- if self._use_cache:
- self._cache[pagenum] = page_results
-
- startv = (
- start % self._pagesize
- if firstid <= start < nextfirstid
- else 0)
-
- endv = (
- ((end - 1) % self._pagesize) + 1
- if (end is not None and firstid <= end <= nextfirstid)
- else None)
-
- if startv != 0 or endv is not None:
- page_results = page_results[startv:endv]
- res.extend(page_results)
-
- # A little optimization - if current page is not "full", ie. does
- # not contain page_size videos then we can assume that this page
- # is the last one - there are no more ids on further pages -
- # i.e. no need to query again.
- if len(page_results) + startv < self._pagesize:
- break
-
- # If we got the whole page, but the next page is not interesting,
- # break out early as well
- if end == nextfirstid:
- break
- return res
-
-
-class InAdvancePagedList(PagedList):
- def __init__(self, pagefunc, pagecount, pagesize):
- self._pagefunc = pagefunc
- self._pagecount = pagecount
- self._pagesize = pagesize
-
- def getslice(self, start=0, end=None):
- res = []
- start_page = start // self._pagesize
- end_page = (
- self._pagecount if end is None else (end // self._pagesize + 1))
- skip_elems = start - start_page * self._pagesize
- only_more = None if end is None else end - start
- for pagenum in range(start_page, end_page):
- page = list(self._pagefunc(pagenum))
- if skip_elems:
- page = page[skip_elems:]
- skip_elems = None
- if only_more is not None:
- if len(page) < only_more:
- only_more -= len(page)
- else:
- page = page[:only_more]
- res.extend(page)
- break
- res.extend(page)
- return res
-
-
-def uppercase_escape(s):
- unicode_escape = codecs.getdecoder('unicode_escape')
- return re.sub(
- r'\\U[0-9a-fA-F]{8}',
- lambda m: unicode_escape(m.group(0))[0],
- s)
-
-
-def lowercase_escape(s):
- unicode_escape = codecs.getdecoder('unicode_escape')
- return re.sub(
- r'\\u[0-9a-fA-F]{4}',
- lambda m: unicode_escape(m.group(0))[0],
- s)
-
-
-def escape_rfc3986(s):
- """Escape non-ASCII characters as suggested by RFC 3986"""
- if sys.version_info < (3, 0) and isinstance(s, compat_str):
- s = s.encode('utf-8')
- return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
-
-
-def escape_url(url):
- """Escape URL as suggested by RFC 3986"""
- url_parsed = compat_urllib_parse_urlparse(url)
- return url_parsed._replace(
- netloc=url_parsed.netloc.encode('idna').decode('ascii'),
- path=escape_rfc3986(url_parsed.path),
- params=escape_rfc3986(url_parsed.params),
- query=escape_rfc3986(url_parsed.query),
- fragment=escape_rfc3986(url_parsed.fragment)
- ).geturl()
-
-
-def read_batch_urls(batch_fd):
- def fixup(url):
- if not isinstance(url, compat_str):
- url = url.decode('utf-8', 'replace')
- BOM_UTF8 = '\xef\xbb\xbf'
- if url.startswith(BOM_UTF8):
- url = url[len(BOM_UTF8):]
- url = url.strip()
- if url.startswith(('#', ';', ']')):
- return False
- return url
-
- with contextlib.closing(batch_fd) as fd:
- return [url for url in map(fixup, fd) if url]
-
-
-def urlencode_postdata(*args, **kargs):
- return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
-
-
-def update_url_query(url, query):
- if not query:
- return url
- parsed_url = compat_urlparse.urlparse(url)
- qs = compat_parse_qs(parsed_url.query)
- qs.update(query)
- return compat_urlparse.urlunparse(parsed_url._replace(
- query=compat_urllib_parse_urlencode(qs, True)))
-
-
-def update_Request(req, url=None, data=None, headers={}, query={}):
- req_headers = req.headers.copy()
- req_headers.update(headers)
- req_data = data or req.data
- req_url = update_url_query(url or req.get_full_url(), query)
- req_get_method = req.get_method()
- if req_get_method == 'HEAD':
- req_type = HEADRequest
- elif req_get_method == 'PUT':
- req_type = PUTRequest
- else:
- req_type = compat_urllib_request.Request
- new_req = req_type(
- req_url, data=req_data, headers=req_headers,
- origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
- if hasattr(req, 'timeout'):
- new_req.timeout = req.timeout
- return new_req
-
-
-def _multipart_encode_impl(data, boundary):
- content_type = 'multipart/form-data; boundary=%s' % boundary
-
- out = b''
- for k, v in data.items():
- out += b'--' + boundary.encode('ascii') + b'\r\n'
- if isinstance(k, compat_str):
- k = k.encode('utf-8')
- if isinstance(v, compat_str):
- v = v.encode('utf-8')
- # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
- # suggests sending UTF-8 directly. Firefox sends UTF-8, too
- content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
- if boundary.encode('ascii') in content:
- raise ValueError('Boundary overlaps with data')
- out += content
-
- out += b'--' + boundary.encode('ascii') + b'--\r\n'
-
- return out, content_type
-
-
-def multipart_encode(data, boundary=None):
- '''
- Encode a dict to RFC 7578-compliant form-data
-
- data:
- A dict where keys and values can be either Unicode or bytes-like
- objects.
- boundary:
- If specified a Unicode object, it's used as the boundary. Otherwise
- a random boundary is generated.
-
- Reference: https://tools.ietf.org/html/rfc7578
- '''
- has_specified_boundary = boundary is not None
-
- while True:
- if boundary is None:
- boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
-
- try:
- out, content_type = _multipart_encode_impl(data, boundary)
- break
- except ValueError:
- if has_specified_boundary:
- raise
- boundary = None
-
- return out, content_type
-
-
-def dict_get(d, key_or_keys, default=None, skip_false_values=True):
- if isinstance(key_or_keys, (list, tuple)):
- for key in key_or_keys:
- if key not in d or d[key] is None or skip_false_values and not d[key]:
- continue
- return d[key]
- return default
- return d.get(key_or_keys, default)
-
-
-def try_get(src, getter, expected_type=None):
- if not isinstance(getter, (list, tuple)):
- getter = [getter]
- for get in getter:
- try:
- v = get(src)
- except (AttributeError, KeyError, TypeError, IndexError):
- pass
- else:
- if expected_type is None or isinstance(v, expected_type):
- return v
-
-
-def merge_dicts(*dicts):
- merged = {}
- for a_dict in dicts:
- for k, v in a_dict.items():
- if v is None:
- continue
- if (k not in merged
- or (isinstance(v, compat_str) and v
- and isinstance(merged[k], compat_str)
- and not merged[k])):
- merged[k] = v
- return merged
-
-
-def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
- return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
-
-
-US_RATINGS = {
- 'G': 0,
- 'PG': 10,
- 'PG-13': 13,
- 'R': 16,
- 'NC': 18,
-}
-
-
-TV_PARENTAL_GUIDELINES = {
- 'TV-Y': 0,
- 'TV-Y7': 7,
- 'TV-G': 0,
- 'TV-PG': 0,
- 'TV-14': 14,
- 'TV-MA': 17,
-}
-
-
-def parse_age_limit(s):
- if type(s) == int:
- return s if 0 <= s <= 21 else None
- if not isinstance(s, compat_basestring):
- return None
- m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
- if m:
- return int(m.group('age'))
- if s in US_RATINGS:
- return US_RATINGS[s]
- m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
- if m:
- return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
- return None
-
-
-def strip_jsonp(code):
- return re.sub(
- r'''(?sx)^
- (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
- (?:\s*&&\s*(?P=func_name))?
- \s*\(\s*(?P<callback_data>.*)\);?
- \s*?(?://[^\n]*)*$''',
- r'\g<callback_data>', code)
-
-
-def js_to_json(code):
- COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
- SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
- INTEGER_TABLE = (
- (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
- (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
- )
-
- def fix_kv(m):
- v = m.group(0)
- if v in ('true', 'false', 'null'):
- return v
- elif v.startswith('/*') or v.startswith('//') or v == ',':
- return ""
-
- if v[0] in ("'", '"'):
- v = re.sub(r'(?s)\\.|"', lambda m: {
- '"': '\\"',
- "\\'": "'",
- '\\\n': '',
- '\\x': '\\u00',
- }.get(m.group(0), m.group(0)), v[1:-1])
-
- for regex, base in INTEGER_TABLE:
- im = re.match(regex, v)
- if im:
- i = int(im.group(1), base)
- return '"%d":' % i if v.endswith(':') else '%d' % i
-
- return '"%s"' % v
-
- return re.sub(r'''(?sx)
- "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
- '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
- {comment}|,(?={skip}[\]}}])|
- (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
- \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
- [0-9]+(?={skip}:)
- '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
-
-
-def qualities(quality_ids):
- """ Get a numeric quality value out of a list of possible values """
- def q(qid):
- try:
- return quality_ids.index(qid)
- except ValueError:
- return -1
- return q
-
-
-DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
-
-
-def limit_length(s, length):
- """ Add ellipses to overly long strings """
- if s is None:
- return None
- ELLIPSES = '...'
- if len(s) > length:
- return s[:length - len(ELLIPSES)] + ELLIPSES
- return s
-
-
-def version_tuple(v):
- return tuple(int(e) for e in re.split(r'[-.]', v))
-
-
-def is_outdated_version(version, limit, assume_new=True):
- if not version:
- return not assume_new
- try:
- return version_tuple(version) < version_tuple(limit)
- except ValueError:
- return not assume_new
-
-
-def ytdl_is_updateable():
- """ Returns if youtube-dl can be updated with -U """
- from zipimport import zipimporter
-
- return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
-
-
-def args_to_str(args):
- # Get a short string representation for a subprocess command
- return ' '.join(compat_shlex_quote(a) for a in args)
-
-
-def error_to_compat_str(err):
- err_str = str(err)
- # On python 2 error byte string must be decoded with proper
- # encoding rather than ascii
- if sys.version_info[0] < 3:
- err_str = err_str.decode(preferredencoding())
- return err_str
-
-
-def mimetype2ext(mt):
- if mt is None:
- return None
-
- ext = {
- 'audio/mp4': 'm4a',
- # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
- # it's the most popular one
- 'audio/mpeg': 'mp3',
- }.get(mt)
- if ext is not None:
- return ext
-
- _, _, res = mt.rpartition('/')
- res = res.split(';')[0].strip().lower()
-
- return {
- '3gpp': '3gp',
- 'smptett+xml': 'tt',
- 'ttaf+xml': 'dfxp',
- 'ttml+xml': 'ttml',
- 'x-flv': 'flv',
- 'x-mp4-fragmented': 'mp4',
- 'x-ms-sami': 'sami',
- 'x-ms-wmv': 'wmv',
- 'mpegurl': 'm3u8',
- 'x-mpegurl': 'm3u8',
- 'vnd.apple.mpegurl': 'm3u8',
- 'dash+xml': 'mpd',
- 'f4m+xml': 'f4m',
- 'hds+xml': 'f4m',
- 'vnd.ms-sstr+xml': 'ism',
- 'quicktime': 'mov',
- 'mp2t': 'ts',
- }.get(res, res)
-
-
-def parse_codecs(codecs_str):
- # http://tools.ietf.org/html/rfc6381
- if not codecs_str:
- return {}
- splited_codecs = list(filter(None, map(
- lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
- vcodec, acodec = None, None
- for full_codec in splited_codecs:
- codec = full_codec.split('.')[0]
- if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
- if not vcodec:
- vcodec = full_codec
- elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
- if not acodec:
- acodec = full_codec
- else:
- write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
- if not vcodec and not acodec:
- if len(splited_codecs) == 2:
- return {
- 'vcodec': splited_codecs[0],
- 'acodec': splited_codecs[1],
- }
- else:
- return {
- 'vcodec': vcodec or 'none',
- 'acodec': acodec or 'none',
- }
- return {}
-
-
-def urlhandle_detect_ext(url_handle):
- getheader = url_handle.headers.get
-
- cd = getheader('Content-Disposition')
- if cd:
- m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
- if m:
- e = determine_ext(m.group('filename'), default_ext=None)
- if e:
- return e
-
- return mimetype2ext(getheader('Content-Type'))
-
-
-def encode_data_uri(data, mime_type):
- return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
-
-
-def age_restricted(content_limit, age_limit):
- """ Returns True iff the content should be blocked """
-
- if age_limit is None: # No limit set
- return False
- if content_limit is None:
- return False # Content available for everyone
- return age_limit < content_limit
-
-
-def is_html(first_bytes):
- """ Detect whether a file contains HTML by examining its first bytes. """
-
- BOMS = [
- (b'\xef\xbb\xbf', 'utf-8'),
- (b'\x00\x00\xfe\xff', 'utf-32-be'),
- (b'\xff\xfe\x00\x00', 'utf-32-le'),
- (b'\xff\xfe', 'utf-16-le'),
- (b'\xfe\xff', 'utf-16-be'),
- ]
- for bom, enc in BOMS:
- if first_bytes.startswith(bom):
- s = first_bytes[len(bom):].decode(enc, 'replace')
- break
- else:
- s = first_bytes.decode('utf-8', 'replace')
-
- return re.match(r'^\s*<', s)
-
-
-def determine_protocol(info_dict):
- protocol = info_dict.get('protocol')
- if protocol is not None:
- return protocol
-
- url = info_dict['url']
- if url.startswith('rtmp'):
- return 'rtmp'
- elif url.startswith('mms'):
- return 'mms'
- elif url.startswith('rtsp'):
- return 'rtsp'
-
- ext = determine_ext(url)
- if ext == 'm3u8':
- return 'm3u8'
- elif ext == 'f4m':
- return 'f4m'
-
- return compat_urllib_parse_urlparse(url).scheme
-
-
-def render_table(header_row, data):
- """ Render a list of rows, each as a list of values """
- table = [header_row] + data
- max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
- format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
- return '\n'.join(format_str % tuple(row) for row in table)
-
-
-def _match_one(filter_part, dct):
- COMPARISON_OPERATORS = {
- '<': operator.lt,
- '<=': operator.le,
- '>': operator.gt,
- '>=': operator.ge,
- '=': operator.eq,
- '!=': operator.ne,
- }
- operator_rex = re.compile(r'''(?x)\s*
- (?P<key>[a-z_]+)
- \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
- (?:
- (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
- (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
- (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
- )
- \s*$
- ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
- m = operator_rex.search(filter_part)
- if m:
- op = COMPARISON_OPERATORS[m.group('op')]
- actual_value = dct.get(m.group('key'))
- if (m.group('quotedstrval') is not None
- or m.group('strval') is not None
- # If the original field is a string and matching comparisonvalue is
- # a number we should respect the origin of the original field
- # and process comparison value as a string (see
- # https://github.com/ytdl-org/youtube-dl/issues/11082).
- or actual_value is not None and m.group('intval') is not None
- and isinstance(actual_value, compat_str)):
- if m.group('op') not in ('=', '!='):
- raise ValueError(
- 'Operator %s does not support string values!' % m.group('op'))
- comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
- quote = m.group('quote')
- if quote is not None:
- comparison_value = comparison_value.replace(r'\%s' % quote, quote)
- else:
- try:
- comparison_value = int(m.group('intval'))
- except ValueError:
- comparison_value = parse_filesize(m.group('intval'))
- if comparison_value is None:
- comparison_value = parse_filesize(m.group('intval') + 'B')
- if comparison_value is None:
- raise ValueError(
- 'Invalid integer value %r in filter part %r' % (
- m.group('intval'), filter_part))
- if actual_value is None:
- return m.group('none_inclusive')
- return op(actual_value, comparison_value)
-
- UNARY_OPERATORS = {
- '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
- '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
- }
- operator_rex = re.compile(r'''(?x)\s*
- (?P<op>%s)\s*(?P<key>[a-z_]+)
- \s*$
- ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
- m = operator_rex.search(filter_part)
- if m:
- op = UNARY_OPERATORS[m.group('op')]
- actual_value = dct.get(m.group('key'))
- return op(actual_value)
-
- raise ValueError('Invalid filter part %r' % filter_part)
-
-
-def match_str(filter_str, dct):
- """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
-
- return all(
- _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
-
-
-def match_filter_func(filter_str):
- def _match_func(info_dict):
- if match_str(filter_str, info_dict):
- return None
- else:
- video_title = info_dict.get('title', info_dict.get('id', 'video'))
- return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
- return _match_func
-
-
-def parse_dfxp_time_expr(time_expr):
- if not time_expr:
- return
-
- mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
- if mobj:
- return float(mobj.group('time_offset'))
-
- mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
- if mobj:
- return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
-
-
-def srt_subtitles_timecode(seconds):
- return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
-
-
-def dfxp2srt(dfxp_data):
- '''
- @param dfxp_data A bytes-like object containing DFXP data
- @returns A unicode object containing converted SRT data
- '''
- LEGACY_NAMESPACES = (
- (b'http://www.w3.org/ns/ttml', [
- b'http://www.w3.org/2004/11/ttaf1',
- b'http://www.w3.org/2006/04/ttaf1',
- b'http://www.w3.org/2006/10/ttaf1',
- ]),
- (b'http://www.w3.org/ns/ttml#styling', [
- b'http://www.w3.org/ns/ttml#style',
- ]),
- )
-
- SUPPORTED_STYLING = [
- 'color',
- 'fontFamily',
- 'fontSize',
- 'fontStyle',
- 'fontWeight',
- 'textDecoration'
- ]
-
- _x = functools.partial(xpath_with_ns, ns_map={
- 'xml': 'http://www.w3.org/XML/1998/namespace',
- 'ttml': 'http://www.w3.org/ns/ttml',
- 'tts': 'http://www.w3.org/ns/ttml#styling',
- })
-
- styles = {}
- default_style = {}
-
- class TTMLPElementParser(object):
- _out = ''
- _unclosed_elements = []
- _applied_styles = []
-
- def start(self, tag, attrib):
- if tag in (_x('ttml:br'), 'br'):
- self._out += '\n'
- else:
- unclosed_elements = []
- style = {}
- element_style_id = attrib.get('style')
- if default_style:
- style.update(default_style)
- if element_style_id:
- style.update(styles.get(element_style_id, {}))
- for prop in SUPPORTED_STYLING:
- prop_val = attrib.get(_x('tts:' + prop))
- if prop_val:
- style[prop] = prop_val
- if style:
- font = ''
- for k, v in sorted(style.items()):
- if self._applied_styles and self._applied_styles[-1].get(k) == v:
- continue
- if k == 'color':
- font += ' color="%s"' % v
- elif k == 'fontSize':
- font += ' size="%s"' % v
- elif k == 'fontFamily':
- font += ' face="%s"' % v
- elif k == 'fontWeight' and v == 'bold':
- self._out += '<b>'
- unclosed_elements.append('b')
- elif k == 'fontStyle' and v == 'italic':
- self._out += '<i>'
- unclosed_elements.append('i')
- elif k == 'textDecoration' and v == 'underline':
- self._out += '<u>'
- unclosed_elements.append('u')
- if font:
- self._out += '<font' + font + '>'
- unclosed_elements.append('font')
- applied_style = {}
- if self._applied_styles:
- applied_style.update(self._applied_styles[-1])
- applied_style.update(style)
- self._applied_styles.append(applied_style)
- self._unclosed_elements.append(unclosed_elements)
-
- def end(self, tag):
- if tag not in (_x('ttml:br'), 'br'):
- unclosed_elements = self._unclosed_elements.pop()
- for element in reversed(unclosed_elements):
- self._out += '</%s>' % element
- if unclosed_elements and self._applied_styles:
- self._applied_styles.pop()
-
- def data(self, data):
- self._out += data
-
- def close(self):
- return self._out.strip()
-
- def parse_node(node):
- target = TTMLPElementParser()
- parser = xml.etree.ElementTree.XMLParser(target=target)
- parser.feed(xml.etree.ElementTree.tostring(node))
- return parser.close()
-
- for k, v in LEGACY_NAMESPACES:
- for ns in v:
- dfxp_data = dfxp_data.replace(ns, k)
-
- dfxp = compat_etree_fromstring(dfxp_data)
- out = []
- paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
-
- if not paras:
- raise ValueError('Invalid dfxp/TTML subtitle')
-
- repeat = False
- while True:
- for style in dfxp.findall(_x('.//ttml:style')):
- style_id = style.get('id') or style.get(_x('xml:id'))
- if not style_id:
- continue
- parent_style_id = style.get('style')
- if parent_style_id:
- if parent_style_id not in styles:
- repeat = True
- continue
- styles[style_id] = styles[parent_style_id].copy()
- for prop in SUPPORTED_STYLING:
- prop_val = style.get(_x('tts:' + prop))
- if prop_val:
- styles.setdefault(style_id, {})[prop] = prop_val
- if repeat:
- repeat = False
- else:
- break
-
- for p in ('body', 'div'):
- ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
- if ele is None:
- continue
- style = styles.get(ele.get('style'))
- if not style:
- continue
- default_style.update(style)
-
- for para, index in zip(paras, itertools.count(1)):
- begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
- end_time = parse_dfxp_time_expr(para.attrib.get('end'))
- dur = parse_dfxp_time_expr(para.attrib.get('dur'))
- if begin_time is None:
- continue
- if not end_time:
- if not dur:
- continue
- end_time = begin_time + dur
- out.append('%d\n%s --> %s\n%s\n\n' % (
- index,
- srt_subtitles_timecode(begin_time),
- srt_subtitles_timecode(end_time),
- parse_node(para)))
-
- return ''.join(out)
-
-
-def cli_option(params, command_option, param):
- param = params.get(param)
- if param:
- param = compat_str(param)
- return [command_option, param] if param is not None else []
-
-
-def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
- param = params.get(param)
- if param is None:
- return []
- assert isinstance(param, bool)
- if separator:
- return [command_option + separator + (true_value if param else false_value)]
- return [command_option, true_value if param else false_value]
-
-
-def cli_valueless_option(params, command_option, param, expected_value=True):
- param = params.get(param)
- return [command_option] if param == expected_value else []
-
-
-def cli_configuration_args(params, param, default=[]):
- ex_args = params.get(param)
- if ex_args is None:
- return default
- assert isinstance(ex_args, list)
- return ex_args
-
-
-class ISO639Utils(object):
- # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
- _lang_map = {
- 'aa': 'aar',
- 'ab': 'abk',
- 'ae': 'ave',
- 'af': 'afr',
- 'ak': 'aka',
- 'am': 'amh',
- 'an': 'arg',
- 'ar': 'ara',
- 'as': 'asm',
- 'av': 'ava',
- 'ay': 'aym',
- 'az': 'aze',
- 'ba': 'bak',
- 'be': 'bel',
- 'bg': 'bul',
- 'bh': 'bih',
- 'bi': 'bis',
- 'bm': 'bam',
- 'bn': 'ben',
- 'bo': 'bod',
- 'br': 'bre',
- 'bs': 'bos',
- 'ca': 'cat',
- 'ce': 'che',
- 'ch': 'cha',
- 'co': 'cos',
- 'cr': 'cre',
- 'cs': 'ces',
- 'cu': 'chu',
- 'cv': 'chv',
- 'cy': 'cym',
- 'da': 'dan',
- 'de': 'deu',
- 'dv': 'div',
- 'dz': 'dzo',
- 'ee': 'ewe',
- 'el': 'ell',
- 'en': 'eng',
- 'eo': 'epo',
- 'es': 'spa',
- 'et': 'est',
- 'eu': 'eus',
- 'fa': 'fas',
- 'ff': 'ful',
- 'fi': 'fin',
- 'fj': 'fij',
- 'fo': 'fao',
- 'fr': 'fra',
- 'fy': 'fry',
- 'ga': 'gle',
- 'gd': 'gla',
- 'gl': 'glg',
- 'gn': 'grn',
- 'gu': 'guj',
- 'gv': 'glv',
- 'ha': 'hau',
- 'he': 'heb',
- 'iw': 'heb', # Replaced by he in 1989 revision
- 'hi': 'hin',
- 'ho': 'hmo',
- 'hr': 'hrv',
- 'ht': 'hat',
- 'hu': 'hun',
- 'hy': 'hye',
- 'hz': 'her',
- 'ia': 'ina',
- 'id': 'ind',
- 'in': 'ind', # Replaced by id in 1989 revision
- 'ie': 'ile',
- 'ig': 'ibo',
- 'ii': 'iii',
- 'ik': 'ipk',
- 'io': 'ido',
- 'is': 'isl',
- 'it': 'ita',
- 'iu': 'iku',
- 'ja': 'jpn',
- 'jv': 'jav',
- 'ka': 'kat',
- 'kg': 'kon',
- 'ki': 'kik',
- 'kj': 'kua',
- 'kk': 'kaz',
- 'kl': 'kal',
- 'km': 'khm',
- 'kn': 'kan',
- 'ko': 'kor',
- 'kr': 'kau',
- 'ks': 'kas',
- 'ku': 'kur',
- 'kv': 'kom',
- 'kw': 'cor',
- 'ky': 'kir',
- 'la': 'lat',
- 'lb': 'ltz',
- 'lg': 'lug',
- 'li': 'lim',
- 'ln': 'lin',
- 'lo': 'lao',
- 'lt': 'lit',
- 'lu': 'lub',
- 'lv': 'lav',
- 'mg': 'mlg',
- 'mh': 'mah',
- 'mi': 'mri',
- 'mk': 'mkd',
- 'ml': 'mal',
- 'mn': 'mon',
- 'mr': 'mar',
- 'ms': 'msa',
- 'mt': 'mlt',
- 'my': 'mya',
- 'na': 'nau',
- 'nb': 'nob',
- 'nd': 'nde',
- 'ne': 'nep',
- 'ng': 'ndo',
- 'nl': 'nld',
- 'nn': 'nno',
- 'no': 'nor',
- 'nr': 'nbl',
- 'nv': 'nav',
- 'ny': 'nya',
- 'oc': 'oci',
- 'oj': 'oji',
- 'om': 'orm',
- 'or': 'ori',
- 'os': 'oss',
- 'pa': 'pan',
- 'pi': 'pli',
- 'pl': 'pol',
- 'ps': 'pus',
- 'pt': 'por',
- 'qu': 'que',
- 'rm': 'roh',
- 'rn': 'run',
- 'ro': 'ron',
- 'ru': 'rus',
- 'rw': 'kin',
- 'sa': 'san',
- 'sc': 'srd',
- 'sd': 'snd',
- 'se': 'sme',
- 'sg': 'sag',
- 'si': 'sin',
- 'sk': 'slk',
- 'sl': 'slv',
- 'sm': 'smo',
- 'sn': 'sna',
- 'so': 'som',
- 'sq': 'sqi',
- 'sr': 'srp',
- 'ss': 'ssw',
- 'st': 'sot',
- 'su': 'sun',
- 'sv': 'swe',
- 'sw': 'swa',
- 'ta': 'tam',
- 'te': 'tel',
- 'tg': 'tgk',
- 'th': 'tha',
- 'ti': 'tir',
- 'tk': 'tuk',
- 'tl': 'tgl',
- 'tn': 'tsn',
- 'to': 'ton',
- 'tr': 'tur',
- 'ts': 'tso',
- 'tt': 'tat',
- 'tw': 'twi',
- 'ty': 'tah',
- 'ug': 'uig',
- 'uk': 'ukr',
- 'ur': 'urd',
- 'uz': 'uzb',
- 've': 'ven',
- 'vi': 'vie',
- 'vo': 'vol',
- 'wa': 'wln',
- 'wo': 'wol',
- 'xh': 'xho',
- 'yi': 'yid',
- 'ji': 'yid', # Replaced by yi in 1989 revision
- 'yo': 'yor',
- 'za': 'zha',
- 'zh': 'zho',
- 'zu': 'zul',
- }
-
- @classmethod
- def short2long(cls, code):
- """Convert language code from ISO 639-1 to ISO 639-2/T"""
- return cls._lang_map.get(code[:2])
-
- @classmethod
- def long2short(cls, code):
- """Convert language code from ISO 639-2/T to ISO 639-1"""
- for short_name, long_name in cls._lang_map.items():
- if long_name == code:
- return short_name
-
-
-class ISO3166Utils(object):
- # From http://data.okfn.org/data/core/country-list
- _country_map = {
- 'AF': 'Afghanistan',
- 'AX': 'Åland Islands',
- 'AL': 'Albania',
- 'DZ': 'Algeria',
- 'AS': 'American Samoa',
- 'AD': 'Andorra',
- 'AO': 'Angola',
- 'AI': 'Anguilla',
- 'AQ': 'Antarctica',
- 'AG': 'Antigua and Barbuda',
- 'AR': 'Argentina',
- 'AM': 'Armenia',
- 'AW': 'Aruba',
- 'AU': 'Australia',
- 'AT': 'Austria',
- 'AZ': 'Azerbaijan',
- 'BS': 'Bahamas',
- 'BH': 'Bahrain',
- 'BD': 'Bangladesh',
- 'BB': 'Barbados',
- 'BY': 'Belarus',
- 'BE': 'Belgium',
- 'BZ': 'Belize',
- 'BJ': 'Benin',
- 'BM': 'Bermuda',
- 'BT': 'Bhutan',
- 'BO': 'Bolivia, Plurinational State of',
- 'BQ': 'Bonaire, Sint Eustatius and Saba',
- 'BA': 'Bosnia and Herzegovina',
- 'BW': 'Botswana',
- 'BV': 'Bouvet Island',
- 'BR': 'Brazil',
- 'IO': 'British Indian Ocean Territory',
- 'BN': 'Brunei Darussalam',
- 'BG': 'Bulgaria',
- 'BF': 'Burkina Faso',
- 'BI': 'Burundi',
- 'KH': 'Cambodia',
- 'CM': 'Cameroon',
- 'CA': 'Canada',
- 'CV': 'Cape Verde',
- 'KY': 'Cayman Islands',
- 'CF': 'Central African Republic',
- 'TD': 'Chad',
- 'CL': 'Chile',
- 'CN': 'China',
- 'CX': 'Christmas Island',
- 'CC': 'Cocos (Keeling) Islands',
- 'CO': 'Colombia',
- 'KM': 'Comoros',
- 'CG': 'Congo',
- 'CD': 'Congo, the Democratic Republic of the',
- 'CK': 'Cook Islands',
- 'CR': 'Costa Rica',
- 'CI': 'Côte d\'Ivoire',
- 'HR': 'Croatia',
- 'CU': 'Cuba',
- 'CW': 'Curaçao',
- 'CY': 'Cyprus',
- 'CZ': 'Czech Republic',
- 'DK': 'Denmark',
- 'DJ': 'Djibouti',
- 'DM': 'Dominica',
- 'DO': 'Dominican Republic',
- 'EC': 'Ecuador',
- 'EG': 'Egypt',
- 'SV': 'El Salvador',
- 'GQ': 'Equatorial Guinea',
- 'ER': 'Eritrea',
- 'EE': 'Estonia',
- 'ET': 'Ethiopia',
- 'FK': 'Falkland Islands (Malvinas)',
- 'FO': 'Faroe Islands',
- 'FJ': 'Fiji',
- 'FI': 'Finland',
- 'FR': 'France',
- 'GF': 'French Guiana',
- 'PF': 'French Polynesia',
- 'TF': 'French Southern Territories',
- 'GA': 'Gabon',
- 'GM': 'Gambia',
- 'GE': 'Georgia',
- 'DE': 'Germany',
- 'GH': 'Ghana',
- 'GI': 'Gibraltar',
- 'GR': 'Greece',
- 'GL': 'Greenland',
- 'GD': 'Grenada',
- 'GP': 'Guadeloupe',
- 'GU': 'Guam',
- 'GT': 'Guatemala',
- 'GG': 'Guernsey',
- 'GN': 'Guinea',
- 'GW': 'Guinea-Bissau',
- 'GY': 'Guyana',
- 'HT': 'Haiti',
- 'HM': 'Heard Island and McDonald Islands',
- 'VA': 'Holy See (Vatican City State)',
- 'HN': 'Honduras',
- 'HK': 'Hong Kong',
- 'HU': 'Hungary',
- 'IS': 'Iceland',
- 'IN': 'India',
- 'ID': 'Indonesia',
- 'IR': 'Iran, Islamic Republic of',
- 'IQ': 'Iraq',
- 'IE': 'Ireland',
- 'IM': 'Isle of Man',
- 'IL': 'Israel',
- 'IT': 'Italy',
- 'JM': 'Jamaica',
- 'JP': 'Japan',
- 'JE': 'Jersey',
- 'JO': 'Jordan',
- 'KZ': 'Kazakhstan',
- 'KE': 'Kenya',
- 'KI': 'Kiribati',
- 'KP': 'Korea, Democratic People\'s Republic of',
- 'KR': 'Korea, Republic of',
- 'KW': 'Kuwait',
- 'KG': 'Kyrgyzstan',
- 'LA': 'Lao People\'s Democratic Republic',
- 'LV': 'Latvia',
- 'LB': 'Lebanon',
- 'LS': 'Lesotho',
- 'LR': 'Liberia',
- 'LY': 'Libya',
- 'LI': 'Liechtenstein',
- 'LT': 'Lithuania',
- 'LU': 'Luxembourg',
- 'MO': 'Macao',
- 'MK': 'Macedonia, the Former Yugoslav Republic of',
- 'MG': 'Madagascar',
- 'MW': 'Malawi',
- 'MY': 'Malaysia',
- 'MV': 'Maldives',
- 'ML': 'Mali',
- 'MT': 'Malta',
- 'MH': 'Marshall Islands',
- 'MQ': 'Martinique',
- 'MR': 'Mauritania',
- 'MU': 'Mauritius',
- 'YT': 'Mayotte',
- 'MX': 'Mexico',
- 'FM': 'Micronesia, Federated States of',
- 'MD': 'Moldova, Republic of',
- 'MC': 'Monaco',
- 'MN': 'Mongolia',
- 'ME': 'Montenegro',
- 'MS': 'Montserrat',
- 'MA': 'Morocco',
- 'MZ': 'Mozambique',
- 'MM': 'Myanmar',
- 'NA': 'Namibia',
- 'NR': 'Nauru',
- 'NP': 'Nepal',
- 'NL': 'Netherlands',
- 'NC': 'New Caledonia',
- 'NZ': 'New Zealand',
- 'NI': 'Nicaragua',
- 'NE': 'Niger',
- 'NG': 'Nigeria',
- 'NU': 'Niue',
- 'NF': 'Norfolk Island',
- 'MP': 'Northern Mariana Islands',
- 'NO': 'Norway',
- 'OM': 'Oman',
- 'PK': 'Pakistan',
- 'PW': 'Palau',
- 'PS': 'Palestine, State of',
- 'PA': 'Panama',
- 'PG': 'Papua New Guinea',
- 'PY': 'Paraguay',
- 'PE': 'Peru',
- 'PH': 'Philippines',
- 'PN': 'Pitcairn',
- 'PL': 'Poland',
- 'PT': 'Portugal',
- 'PR': 'Puerto Rico',
- 'QA': 'Qatar',
- 'RE': 'Réunion',
- 'RO': 'Romania',
- 'RU': 'Russian Federation',
- 'RW': 'Rwanda',
- 'BL': 'Saint Barthélemy',
- 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
- 'KN': 'Saint Kitts and Nevis',
- 'LC': 'Saint Lucia',
- 'MF': 'Saint Martin (French part)',
- 'PM': 'Saint Pierre and Miquelon',
- 'VC': 'Saint Vincent and the Grenadines',
- 'WS': 'Samoa',
- 'SM': 'San Marino',
- 'ST': 'Sao Tome and Principe',
- 'SA': 'Saudi Arabia',
- 'SN': 'Senegal',
- 'RS': 'Serbia',
- 'SC': 'Seychelles',
- 'SL': 'Sierra Leone',
- 'SG': 'Singapore',
- 'SX': 'Sint Maarten (Dutch part)',
- 'SK': 'Slovakia',
- 'SI': 'Slovenia',
- 'SB': 'Solomon Islands',
- 'SO': 'Somalia',
- 'ZA': 'South Africa',
- 'GS': 'South Georgia and the South Sandwich Islands',
- 'SS': 'South Sudan',
- 'ES': 'Spain',
- 'LK': 'Sri Lanka',
- 'SD': 'Sudan',
- 'SR': 'Suriname',
- 'SJ': 'Svalbard and Jan Mayen',
- 'SZ': 'Swaziland',
- 'SE': 'Sweden',
- 'CH': 'Switzerland',
- 'SY': 'Syrian Arab Republic',
- 'TW': 'Taiwan, Province of China',
- 'TJ': 'Tajikistan',
- 'TZ': 'Tanzania, United Republic of',
- 'TH': 'Thailand',
- 'TL': 'Timor-Leste',
- 'TG': 'Togo',
- 'TK': 'Tokelau',
- 'TO': 'Tonga',
- 'TT': 'Trinidad and Tobago',
- 'TN': 'Tunisia',
- 'TR': 'Turkey',
- 'TM': 'Turkmenistan',
- 'TC': 'Turks and Caicos Islands',
- 'TV': 'Tuvalu',
- 'UG': 'Uganda',
- 'UA': 'Ukraine',
- 'AE': 'United Arab Emirates',
- 'GB': 'United Kingdom',
- 'US': 'United States',
- 'UM': 'United States Minor Outlying Islands',
- 'UY': 'Uruguay',
- 'UZ': 'Uzbekistan',
- 'VU': 'Vanuatu',
- 'VE': 'Venezuela, Bolivarian Republic of',
- 'VN': 'Viet Nam',
- 'VG': 'Virgin Islands, British',
- 'VI': 'Virgin Islands, U.S.',
- 'WF': 'Wallis and Futuna',
- 'EH': 'Western Sahara',
- 'YE': 'Yemen',
- 'ZM': 'Zambia',
- 'ZW': 'Zimbabwe',
- }
-
- @classmethod
- def short2full(cls, code):
- """Convert an ISO 3166-2 country code to the corresponding full name"""
- return cls._country_map.get(code.upper())
-
-
-class GeoUtils(object):
- # Major IPv4 address blocks per country
- _country_ip_map = {
- 'AD': '85.94.160.0/19',
- 'AE': '94.200.0.0/13',
- 'AF': '149.54.0.0/17',
- 'AG': '209.59.64.0/18',
- 'AI': '204.14.248.0/21',
- 'AL': '46.99.0.0/16',
- 'AM': '46.70.0.0/15',
- 'AO': '105.168.0.0/13',
- 'AP': '159.117.192.0/21',
- 'AR': '181.0.0.0/12',
- 'AS': '202.70.112.0/20',
- 'AT': '84.112.0.0/13',
- 'AU': '1.128.0.0/11',
- 'AW': '181.41.0.0/18',
- 'AZ': '5.191.0.0/16',
- 'BA': '31.176.128.0/17',
- 'BB': '65.48.128.0/17',
- 'BD': '114.130.0.0/16',
- 'BE': '57.0.0.0/8',
- 'BF': '129.45.128.0/17',
- 'BG': '95.42.0.0/15',
- 'BH': '37.131.0.0/17',
- 'BI': '154.117.192.0/18',
- 'BJ': '137.255.0.0/16',
- 'BL': '192.131.134.0/24',
- 'BM': '196.12.64.0/18',
- 'BN': '156.31.0.0/16',
- 'BO': '161.56.0.0/16',
- 'BQ': '161.0.80.0/20',
- 'BR': '152.240.0.0/12',
- 'BS': '24.51.64.0/18',
- 'BT': '119.2.96.0/19',
- 'BW': '168.167.0.0/16',
- 'BY': '178.120.0.0/13',
- 'BZ': '179.42.192.0/18',
- 'CA': '99.224.0.0/11',
- 'CD': '41.243.0.0/16',
- 'CF': '196.32.200.0/21',
- 'CG': '197.214.128.0/17',
- 'CH': '85.0.0.0/13',
- 'CI': '154.232.0.0/14',
- 'CK': '202.65.32.0/19',
- 'CL': '152.172.0.0/14',
- 'CM': '165.210.0.0/15',
- 'CN': '36.128.0.0/10',
- 'CO': '181.240.0.0/12',
- 'CR': '201.192.0.0/12',
- 'CU': '152.206.0.0/15',
- 'CV': '165.90.96.0/19',
- 'CW': '190.88.128.0/17',
- 'CY': '46.198.0.0/15',
- 'CZ': '88.100.0.0/14',
- 'DE': '53.0.0.0/8',
- 'DJ': '197.241.0.0/17',
- 'DK': '87.48.0.0/12',
- 'DM': '192.243.48.0/20',
- 'DO': '152.166.0.0/15',
- 'DZ': '41.96.0.0/12',
- 'EC': '186.68.0.0/15',
- 'EE': '90.190.0.0/15',
- 'EG': '156.160.0.0/11',
- 'ER': '196.200.96.0/20',
- 'ES': '88.0.0.0/11',
- 'ET': '196.188.0.0/14',
- 'EU': '2.16.0.0/13',
- 'FI': '91.152.0.0/13',
- 'FJ': '144.120.0.0/16',
- 'FM': '119.252.112.0/20',
- 'FO': '88.85.32.0/19',
- 'FR': '90.0.0.0/9',
- 'GA': '41.158.0.0/15',
- 'GB': '25.0.0.0/8',
- 'GD': '74.122.88.0/21',
- 'GE': '31.146.0.0/16',
- 'GF': '161.22.64.0/18',
- 'GG': '62.68.160.0/19',
- 'GH': '45.208.0.0/14',
- 'GI': '85.115.128.0/19',
- 'GL': '88.83.0.0/19',
- 'GM': '160.182.0.0/15',
- 'GN': '197.149.192.0/18',
- 'GP': '104.250.0.0/19',
- 'GQ': '105.235.224.0/20',
- 'GR': '94.64.0.0/13',
- 'GT': '168.234.0.0/16',
- 'GU': '168.123.0.0/16',
- 'GW': '197.214.80.0/20',
- 'GY': '181.41.64.0/18',
- 'HK': '113.252.0.0/14',
- 'HN': '181.210.0.0/16',
- 'HR': '93.136.0.0/13',
- 'HT': '148.102.128.0/17',
- 'HU': '84.0.0.0/14',
- 'ID': '39.192.0.0/10',
- 'IE': '87.32.0.0/12',
- 'IL': '79.176.0.0/13',
- 'IM': '5.62.80.0/20',
- 'IN': '117.192.0.0/10',
- 'IO': '203.83.48.0/21',
- 'IQ': '37.236.0.0/14',
- 'IR': '2.176.0.0/12',
- 'IS': '82.221.0.0/16',
- 'IT': '79.0.0.0/10',
- 'JE': '87.244.64.0/18',
- 'JM': '72.27.0.0/17',
- 'JO': '176.29.0.0/16',
- 'JP': '126.0.0.0/8',
- 'KE': '105.48.0.0/12',
- 'KG': '158.181.128.0/17',
- 'KH': '36.37.128.0/17',
- 'KI': '103.25.140.0/22',
- 'KM': '197.255.224.0/20',
- 'KN': '198.32.32.0/19',
- 'KP': '175.45.176.0/22',
- 'KR': '175.192.0.0/10',
- 'KW': '37.36.0.0/14',
- 'KY': '64.96.0.0/15',
- 'KZ': '2.72.0.0/13',
- 'LA': '115.84.64.0/18',
- 'LB': '178.135.0.0/16',
- 'LC': '192.147.231.0/24',
- 'LI': '82.117.0.0/19',
- 'LK': '112.134.0.0/15',
- 'LR': '41.86.0.0/19',
- 'LS': '129.232.0.0/17',
- 'LT': '78.56.0.0/13',
- 'LU': '188.42.0.0/16',
- 'LV': '46.109.0.0/16',
- 'LY': '41.252.0.0/14',
- 'MA': '105.128.0.0/11',
- 'MC': '88.209.64.0/18',
- 'MD': '37.246.0.0/16',
- 'ME': '178.175.0.0/17',
- 'MF': '74.112.232.0/21',
- 'MG': '154.126.0.0/17',
- 'MH': '117.103.88.0/21',
- 'MK': '77.28.0.0/15',
- 'ML': '154.118.128.0/18',
- 'MM': '37.111.0.0/17',
- 'MN': '49.0.128.0/17',
- 'MO': '60.246.0.0/16',
- 'MP': '202.88.64.0/20',
- 'MQ': '109.203.224.0/19',
- 'MR': '41.188.64.0/18',
- 'MS': '208.90.112.0/22',
- 'MT': '46.11.0.0/16',
- 'MU': '105.16.0.0/12',
- 'MV': '27.114.128.0/18',
- 'MW': '105.234.0.0/16',
- 'MX': '187.192.0.0/11',
- 'MY': '175.136.0.0/13',
- 'MZ': '197.218.0.0/15',
- 'NA': '41.182.0.0/16',
- 'NC': '101.101.0.0/18',
- 'NE': '197.214.0.0/18',
- 'NF': '203.17.240.0/22',
- 'NG': '105.112.0.0/12',
- 'NI': '186.76.0.0/15',
- 'NL': '145.96.0.0/11',
- 'NO': '84.208.0.0/13',
- 'NP': '36.252.0.0/15',
- 'NR': '203.98.224.0/19',
- 'NU': '49.156.48.0/22',
- 'NZ': '49.224.0.0/14',
- 'OM': '5.36.0.0/15',
- 'PA': '186.72.0.0/15',
- 'PE': '186.160.0.0/14',
- 'PF': '123.50.64.0/18',
- 'PG': '124.240.192.0/19',
- 'PH': '49.144.0.0/13',
- 'PK': '39.32.0.0/11',
- 'PL': '83.0.0.0/11',
- 'PM': '70.36.0.0/20',
- 'PR': '66.50.0.0/16',
- 'PS': '188.161.0.0/16',
- 'PT': '85.240.0.0/13',
- 'PW': '202.124.224.0/20',
- 'PY': '181.120.0.0/14',
- 'QA': '37.210.0.0/15',
- 'RE': '139.26.0.0/16',
- 'RO': '79.112.0.0/13',
- 'RS': '178.220.0.0/14',
- 'RU': '5.136.0.0/13',
- 'RW': '105.178.0.0/15',
- 'SA': '188.48.0.0/13',
- 'SB': '202.1.160.0/19',
- 'SC': '154.192.0.0/11',
- 'SD': '154.96.0.0/13',
- 'SE': '78.64.0.0/12',
- 'SG': '152.56.0.0/14',
- 'SI': '188.196.0.0/14',
- 'SK': '78.98.0.0/15',
- 'SL': '197.215.0.0/17',
- 'SM': '89.186.32.0/19',
- 'SN': '41.82.0.0/15',
- 'SO': '197.220.64.0/19',
- 'SR': '186.179.128.0/17',
- 'SS': '105.235.208.0/21',
- 'ST': '197.159.160.0/19',
- 'SV': '168.243.0.0/16',
- 'SX': '190.102.0.0/20',
- 'SY': '5.0.0.0/16',
- 'SZ': '41.84.224.0/19',
- 'TC': '65.255.48.0/20',
- 'TD': '154.68.128.0/19',
- 'TG': '196.168.0.0/14',
- 'TH': '171.96.0.0/13',
- 'TJ': '85.9.128.0/18',
- 'TK': '27.96.24.0/21',
- 'TL': '180.189.160.0/20',
- 'TM': '95.85.96.0/19',
- 'TN': '197.0.0.0/11',
- 'TO': '175.176.144.0/21',
- 'TR': '78.160.0.0/11',
- 'TT': '186.44.0.0/15',
- 'TV': '202.2.96.0/19',
- 'TW': '120.96.0.0/11',
- 'TZ': '156.156.0.0/14',
- 'UA': '93.72.0.0/13',
- 'UG': '154.224.0.0/13',
- 'US': '3.0.0.0/8',
- 'UY': '167.56.0.0/13',
- 'UZ': '82.215.64.0/18',
- 'VA': '212.77.0.0/19',
- 'VC': '24.92.144.0/20',
- 'VE': '186.88.0.0/13',
- 'VG': '172.103.64.0/18',
- 'VI': '146.226.0.0/16',
- 'VN': '14.160.0.0/11',
- 'VU': '202.80.32.0/20',
- 'WF': '117.20.32.0/21',
- 'WS': '202.4.32.0/19',
- 'YE': '134.35.0.0/16',
- 'YT': '41.242.116.0/22',
- 'ZA': '41.0.0.0/11',
- 'ZM': '165.56.0.0/13',
- 'ZW': '41.85.192.0/19',
- }
-
- @classmethod
- def random_ipv4(cls, code_or_block):
- if len(code_or_block) == 2:
- block = cls._country_ip_map.get(code_or_block.upper())
- if not block:
- return None
- else:
- block = code_or_block
- addr, preflen = block.split('/')
- addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
- addr_max = addr_min | (0xffffffff >> int(preflen))
- return compat_str(socket.inet_ntoa(
- compat_struct_pack('!L', random.randint(addr_min, addr_max))))
-
-
-class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
- def __init__(self, proxies=None):
- # Set default handlers
- for type in ('http', 'https'):
- setattr(self, '%s_open' % type,
- lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
- meth(r, proxy, type))
- compat_urllib_request.ProxyHandler.__init__(self, proxies)
-
- def proxy_open(self, req, proxy, type):
- req_proxy = req.headers.get('Ytdl-request-proxy')
- if req_proxy is not None:
- proxy = req_proxy
- del req.headers['Ytdl-request-proxy']
-
- if proxy == '__noproxy__':
- return None # No Proxy
- if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
- req.add_header('Ytdl-socks-proxy', proxy)
- # youtube-dl's http/https handlers do wrapping the socket with socks
- return None
- return compat_urllib_request.ProxyHandler.proxy_open(
- self, req, proxy, type)
-
-
-# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
-# released into Public Domain
-# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
-
-def long_to_bytes(n, blocksize=0):
- """long_to_bytes(n:long, blocksize:int) : string
- Convert a long integer to a byte string.
-
- If optional blocksize is given and greater than zero, pad the front of the
- byte string with binary zeros so that the length is a multiple of
- blocksize.
- """
- # after much testing, this algorithm was deemed to be the fastest
- s = b''
- n = int(n)
- while n > 0:
- s = compat_struct_pack('>I', n & 0xffffffff) + s
- n = n >> 32
- # strip off leading zeros
- for i in range(len(s)):
- if s[i] != b'\000'[0]:
- break
- else:
- # only happens when n == 0
- s = b'\000'
- i = 0
- s = s[i:]
- # add back some pad bytes. this could be done more efficiently w.r.t. the
- # de-padding being done above, but sigh...
- if blocksize > 0 and len(s) % blocksize:
- s = (blocksize - len(s) % blocksize) * b'\000' + s
- return s
-
-
-def bytes_to_long(s):
- """bytes_to_long(string) : long
- Convert a byte string to a long integer.
-
- This is (essentially) the inverse of long_to_bytes().
- """
- acc = 0
- length = len(s)
- if length % 4:
- extra = (4 - length % 4)
- s = b'\000' * extra + s
- length = length + extra
- for i in range(0, length, 4):
- acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
- return acc
-
-
-def ohdave_rsa_encrypt(data, exponent, modulus):
- '''
- Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
-
- Input:
- data: data to encrypt, bytes-like object
- exponent, modulus: parameter e and N of RSA algorithm, both integer
- Output: hex string of encrypted data
-
- Limitation: supports one block encryption only
- '''
-
- payload = int(binascii.hexlify(data[::-1]), 16)
- encrypted = pow(payload, exponent, modulus)
- return '%x' % encrypted
-
-
-def pkcs1pad(data, length):
- """
- Padding input data with PKCS#1 scheme
-
- @param {int[]} data input data
- @param {int} length target length
- @returns {int[]} padded data
- """
- if len(data) > length - 11:
- raise ValueError('Input data too long for PKCS#1 padding')
-
- pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
- return [0, 2] + pseudo_random + [0] + data
-
-
-def encode_base_n(num, n, table=None):
- FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
- if not table:
- table = FULL_TABLE[:n]
-
- if n > len(table):
- raise ValueError('base %d exceeds table length %d' % (n, len(table)))
-
- if num == 0:
- return table[0]
-
- ret = ''
- while num:
- ret = table[num % n] + ret
- num = num // n
- return ret
-
-
-def decode_packed_codes(code):
- mobj = re.search(PACKED_CODES_RE, code)
- obfucasted_code, base, count, symbols = mobj.groups()
- base = int(base)
- count = int(count)
- symbols = symbols.split('|')
- symbol_table = {}
-
- while count:
- count -= 1
- base_n_count = encode_base_n(count, base)
- symbol_table[base_n_count] = symbols[count] or base_n_count
-
- return re.sub(
- r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
- obfucasted_code)
-
-
-def parse_m3u8_attributes(attrib):
- info = {}
- for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
- if val.startswith('"'):
- val = val[1:-1]
- info[key] = val
- return info
-
-
-def urshift(val, n):
- return val >> n if val >= 0 else (val + 0x100000000) >> n
-
-
-# Based on png2str() written by @gdkchan and improved by @yokrysty
-# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
-def decode_png(png_data):
- # Reference: https://www.w3.org/TR/PNG/
- header = png_data[8:]
-
- if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
- raise IOError('Not a valid PNG file.')
-
- int_map = {1: '>B', 2: '>H', 4: '>I'}
- unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
-
- chunks = []
-
- while header:
- length = unpack_integer(header[:4])
- header = header[4:]
-
- chunk_type = header[:4]
- header = header[4:]
-
- chunk_data = header[:length]
- header = header[length:]
-
- header = header[4:] # Skip CRC
-
- chunks.append({
- 'type': chunk_type,
- 'length': length,
- 'data': chunk_data
- })
-
- ihdr = chunks[0]['data']
-
- width = unpack_integer(ihdr[:4])
- height = unpack_integer(ihdr[4:8])
-
- idat = b''
-
- for chunk in chunks:
- if chunk['type'] == b'IDAT':
- idat += chunk['data']
-
- if not idat:
- raise IOError('Unable to read PNG data.')
-
- decompressed_data = bytearray(zlib.decompress(idat))
-
- stride = width * 3
- pixels = []
-
- def _get_pixel(idx):
- x = idx % stride
- y = idx // stride
- return pixels[y][x]
-
- for y in range(height):
- basePos = y * (1 + stride)
- filter_type = decompressed_data[basePos]
-
- current_row = []
-
- pixels.append(current_row)
-
- for x in range(stride):
- color = decompressed_data[1 + basePos + x]
- basex = y * stride + x
- left = 0
- up = 0
-
- if x > 2:
- left = _get_pixel(basex - 3)
- if y > 0:
- up = _get_pixel(basex - stride)
-
- if filter_type == 1: # Sub
- color = (color + left) & 0xff
- elif filter_type == 2: # Up
- color = (color + up) & 0xff
- elif filter_type == 3: # Average
- color = (color + ((left + up) >> 1)) & 0xff
- elif filter_type == 4: # Paeth
- a = left
- b = up
- c = 0
-
- if x > 2 and y > 0:
- c = _get_pixel(basex - stride - 3)
-
- p = a + b - c
-
- pa = abs(p - a)
- pb = abs(p - b)
- pc = abs(p - c)
-
- if pa <= pb and pa <= pc:
- color = (color + a) & 0xff
- elif pb <= pc:
- color = (color + b) & 0xff
- else:
- color = (color + c) & 0xff
-
- current_row.append(color)
-
- return width, height, pixels
-
-
-def write_xattr(path, key, value):
- # This mess below finds the best xattr tool for the job
- try:
- # try the pyxattr module...
- import xattr
-
- if hasattr(xattr, 'set'): # pyxattr
- # Unicode arguments are not supported in python-pyxattr until
- # version 0.5.0
- # See https://github.com/ytdl-org/youtube-dl/issues/5498
- pyxattr_required_version = '0.5.0'
- if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
- # TODO: fallback to CLI tools
- raise XAttrUnavailableError(
- 'python-pyxattr is detected but is too old. '
- 'youtube-dl requires %s or above while your version is %s. '
- 'Falling back to other xattr implementations' % (
- pyxattr_required_version, xattr.__version__))
-
- setxattr = xattr.set
- else: # xattr
- setxattr = xattr.setxattr
-
- try:
- setxattr(path, key, value)
- except EnvironmentError as e:
- raise XAttrMetadataError(e.errno, e.strerror)
-
- except ImportError:
- if compat_os_name == 'nt':
- # Write xattrs to NTFS Alternate Data Streams:
- # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
- assert ':' not in key
- assert os.path.exists(path)
-
- ads_fn = path + ':' + key
- try:
- with open(ads_fn, 'wb') as f:
- f.write(value)
- except EnvironmentError as e:
- raise XAttrMetadataError(e.errno, e.strerror)
- else:
- user_has_setfattr = check_executable('setfattr', ['--version'])
- user_has_xattr = check_executable('xattr', ['-h'])
-
- if user_has_setfattr or user_has_xattr:
-
- value = value.decode('utf-8')
- if user_has_setfattr:
- executable = 'setfattr'
- opts = ['-n', key, '-v', value]
- elif user_has_xattr:
- executable = 'xattr'
- opts = ['-w', key, value]
-
- cmd = ([encodeFilename(executable, True)]
- + [encodeArgument(o) for o in opts]
- + [encodeFilename(path, True)])
-
- try:
- p = subprocess.Popen(
- cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
- except EnvironmentError as e:
- raise XAttrMetadataError(e.errno, e.strerror)
- stdout, stderr = p.communicate()
- stderr = stderr.decode('utf-8', 'replace')
- if p.returncode != 0:
- raise XAttrMetadataError(p.returncode, stderr)
-
- else:
- # On Unix, and can't find pyxattr, setfattr, or xattr.
- if sys.platform.startswith('linux'):
- raise XAttrUnavailableError(
- "Couldn't find a tool to set the xattrs. "
- "Install either the python 'pyxattr' or 'xattr' "
- "modules, or the GNU 'attr' package "
- "(which contains the 'setfattr' tool).")
- else:
- raise XAttrUnavailableError(
- "Couldn't find a tool to set the xattrs. "
- "Install either the python 'xattr' module, "
- "or the 'xattr' binary.")
-
-
-def random_birthday(year_field, month_field, day_field):
- start_date = datetime.date(1950, 1, 1)
- end_date = datetime.date(1995, 12, 31)
- offset = random.randint(0, (end_date - start_date).days)
- random_date = start_date + datetime.timedelta(offset)
- return {
- year_field: str(random_date.year),
- month_field: str(random_date.month),
- day_field: str(random_date.day),
- }
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
deleted file mode 100644
index c3eafb068..000000000
--- a/youtube_dl/version.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from __future__ import unicode_literals
-
-__version__ = '2019.09.28'
diff --git a/youtube_dlc/YoutubeDL.py b/youtube_dlc/YoutubeDL.py
new file mode 100644
index 000000000..f79d31deb
--- /dev/null
+++ b/youtube_dlc/YoutubeDL.py
@@ -0,0 +1,2417 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+from __future__ import absolute_import, unicode_literals
+
+import collections
+import contextlib
+import copy
+import datetime
+import errno
+import fileinput
+import io
+import itertools
+import json
+import locale
+import operator
+import os
+import platform
+import re
+import shutil
+import subprocess
+import socket
+import sys
+import time
+import tokenize
+import traceback
+import random
+
+from string import ascii_letters
+
+from .compat import (
+ compat_basestring,
+ compat_cookiejar,
+ compat_get_terminal_size,
+ compat_http_client,
+ compat_kwargs,
+ compat_numeric_types,
+ compat_os_name,
+ compat_str,
+ compat_tokenize_tokenize,
+ compat_urllib_error,
+ compat_urllib_request,
+ compat_urllib_request_DataHandler,
+)
+from .utils import (
+ age_restricted,
+ args_to_str,
+ ContentTooShortError,
+ date_from_str,
+ DateRange,
+ DEFAULT_OUTTMPL,
+ determine_ext,
+ determine_protocol,
+ DownloadError,
+ encode_compat_str,
+ encodeFilename,
+ error_to_compat_str,
+ expand_path,
+ ExtractorError,
+ format_bytes,
+ formatSeconds,
+ GeoRestrictedError,
+ int_or_none,
+ ISO3166Utils,
+ locked_file,
+ make_HTTPS_handler,
+ MaxDownloadsReached,
+ orderedSet,
+ PagedList,
+ parse_filesize,
+ PerRequestProxyHandler,
+ platform_name,
+ PostProcessingError,
+ preferredencoding,
+ prepend_extension,
+ register_socks_protocols,
+ render_table,
+ replace_extension,
+ SameFileError,
+ sanitize_filename,
+ sanitize_path,
+ sanitize_url,
+ sanitized_Request,
+ std_headers,
+ str_or_none,
+ subtitles_filename,
+ UnavailableVideoError,
+ url_basename,
+ version_tuple,
+ write_json_file,
+ write_string,
+ YoutubeDLCookieJar,
+ YoutubeDLCookieProcessor,
+ YoutubeDLHandler,
+ YoutubeDLRedirectHandler,
+)
+from .cache import Cache
+from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
+from .extractor.openload import PhantomJSwrapper
+from .downloader import get_suitable_downloader
+from .downloader.rtmp import rtmpdump_version
+from .postprocessor import (
+ FFmpegFixupM3u8PP,
+ FFmpegFixupM4aPP,
+ FFmpegFixupStretchedPP,
+ FFmpegMergerPP,
+ FFmpegPostProcessor,
+ get_postprocessor,
+)
+from .version import __version__
+
+if compat_os_name == 'nt':
+ import ctypes
+
+
+class YoutubeDL(object):
+ """YoutubeDL class.
+
+ YoutubeDL objects are the ones responsible of downloading the
+ actual video file and writing it to disk if the user has requested
+ it, among some other tasks. In most cases there should be one per
+ program. As, given a video URL, the downloader doesn't know how to
+ extract all the needed information, task that InfoExtractors do, it
+ has to pass the URL to one of them.
+
+ For this, YoutubeDL objects have a method that allows
+ InfoExtractors to be registered in a given order. When it is passed
+ a URL, the YoutubeDL object handles it to the first InfoExtractor it
+ finds that reports being able to handle it. The InfoExtractor extracts
+ all the information about the video or videos the URL refers to, and
+ YoutubeDL process the extracted information, possibly using a File
+ Downloader to download the video.
+
+ YoutubeDL objects accept a lot of parameters. In order not to saturate
+ the object constructor with arguments, it receives a dictionary of
+ options instead. These options are available through the params
+ attribute for the InfoExtractors to use. The YoutubeDL also
+ registers itself as the downloader in charge for the InfoExtractors
+ that are added to it, so this is a "mutual registration".
+
+ Available options:
+
+ username: Username for authentication purposes.
+ password: Password for authentication purposes.
+ videopassword: Password for accessing a video.
+ ap_mso: Adobe Pass multiple-system operator identifier.
+ ap_username: Multiple-system operator account username.
+ ap_password: Multiple-system operator account password.
+ usenetrc: Use netrc for authentication instead.
+ verbose: Print additional info to stdout.
+ quiet: Do not print messages to stdout.
+ no_warnings: Do not print out anything for warnings.
+ forceurl: Force printing final URL.
+ forcetitle: Force printing title.
+ forceid: Force printing ID.
+ forcethumbnail: Force printing thumbnail URL.
+ forcedescription: Force printing description.
+ forcefilename: Force printing final filename.
+ forceduration: Force printing duration.
+ forcejson: Force printing info_dict as JSON.
+ dump_single_json: Force printing the info_dict of the whole playlist
+ (or video) as a single JSON line.
+ simulate: Do not download the video files.
+ format: Video format code. See options.py for more information.
+ outtmpl: Template for output names.
+ restrictfilenames: Do not allow "&" and spaces in file names
+ ignoreerrors: Do not stop on download errors.
+ force_generic_extractor: Force downloader to use the generic extractor
+ nooverwrites: Prevent overwriting files.
+ playliststart: Playlist item to start at.
+ playlistend: Playlist item to end at.
+ playlist_items: Specific indices of playlist to download.
+ playlistreverse: Download playlist items in reverse order.
+ playlistrandom: Download playlist items in random order.
+ matchtitle: Download only matching titles.
+ rejecttitle: Reject downloads for matching titles.
+ logger: Log messages to a logging.Logger instance.
+ logtostderr: Log messages to stderr instead of stdout.
+ writedescription: Write the video description to a .description file
+ writeinfojson: Write the video description to a .info.json file
+ writeannotations: Write the video annotations to a .annotations.xml file
+ writethumbnail: Write the thumbnail image to a file
+ write_all_thumbnails: Write all thumbnail formats to files
+ writesubtitles: Write the video subtitles to a file
+ writeautomaticsub: Write the automatically generated subtitles to a file
+ allsubtitles: Downloads all the subtitles of the video
+ (requires writesubtitles or writeautomaticsub)
+ listsubtitles: Lists all available subtitles for the video
+ subtitlesformat: The format code for subtitles
+ subtitleslangs: List of languages of the subtitles to download
+ keepvideo: Keep the video file after post-processing
+ daterange: A DateRange object, download only if the upload_date is in the range.
+ skip_download: Skip the actual download of the video file
+ cachedir: Location of the cache files in the filesystem.
+ False to disable filesystem cache.
+ noplaylist: Download single video instead of a playlist if in doubt.
+ age_limit: An integer representing the user's age in years.
+ Unsuitable videos for the given age are skipped.
+ min_views: An integer representing the minimum view count the video
+ must have in order to not be skipped.
+ Videos without view count information are always
+ downloaded. None for no limit.
+ max_views: An integer representing the maximum view count.
+ Videos that are more popular than that are not
+ downloaded.
+ Videos without view count information are always
+ downloaded. None for no limit.
+ download_archive: File name of a file where all downloads are recorded.
+ Videos already present in the file are not downloaded
+ again.
+ cookiefile: File name where cookies should be read from and dumped to.
+ nocheckcertificate:Do not verify SSL certificates
+ prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
+ At the moment, this is only supported by YouTube.
+ proxy: URL of the proxy server to use
+ geo_verification_proxy: URL of the proxy to use for IP address verification
+ on geo-restricted sites.
+ socket_timeout: Time to wait for unresponsive hosts, in seconds
+ bidi_workaround: Work around buggy terminals without bidirectional text
+ support, using fridibi
+ debug_printtraffic:Print out sent and received HTTP traffic
+ include_ads: Download ads as well
+ default_search: Prepend this string if an input url is not valid.
+ 'auto' for elaborate guessing
+ encoding: Use this encoding instead of the system-specified.
+ extract_flat: Do not resolve URLs, return the immediate result.
+ Pass in 'in_playlist' to only show this behavior for
+ playlist items.
+ postprocessors: A list of dictionaries, each with an entry
+ * key: The name of the postprocessor. See
+ youtube_dlc/postprocessor/__init__.py for a list.
+ as well as any further keyword arguments for the
+ postprocessor.
+ progress_hooks: A list of functions that get called on download
+ progress, with a dictionary with the entries
+ * status: One of "downloading", "error", or "finished".
+ Check this first and ignore unknown values.
+
+ If status is one of "downloading", or "finished", the
+ following properties may also be present:
+ * filename: The final filename (always present)
+ * tmpfilename: The filename we're currently writing to
+ * downloaded_bytes: Bytes on disk
+ * total_bytes: Size of the whole file, None if unknown
+ * total_bytes_estimate: Guess of the eventual file size,
+ None if unavailable.
+ * elapsed: The number of seconds since download started.
+ * eta: The estimated time in seconds, None if unknown
+ * speed: The download speed in bytes/second, None if
+ unknown
+ * fragment_index: The counter of the currently
+ downloaded video fragment.
+ * fragment_count: The number of fragments (= individual
+ files that will be merged)
+
+ Progress hooks are guaranteed to be called at least once
+ (with status "finished") if the download is successful.
+ merge_output_format: Extension to use when merging formats.
+ fixup: Automatically correct known faults of the file.
+ One of:
+ - "never": do nothing
+ - "warn": only emit a warning
+ - "detect_or_warn": check whether we can do anything
+ about it, warn otherwise (default)
+ source_address: Client-side IP address to bind to.
+ call_home: Boolean, true iff we are allowed to contact the
+ youtube-dlc servers for debugging.
+ sleep_interval: Number of seconds to sleep before each download when
+ used alone or a lower bound of a range for randomized
+ sleep before each download (minimum possible number
+ of seconds to sleep) when used along with
+ max_sleep_interval.
+ max_sleep_interval:Upper bound of a range for randomized sleep before each
+ download (maximum possible number of seconds to sleep).
+ Must only be used along with sleep_interval.
+ Actual sleep time will be a random float from range
+ [sleep_interval; max_sleep_interval].
+ listformats: Print an overview of available video formats and exit.
+ list_thumbnails: Print a table of all thumbnails and exit.
+ match_filter: A function that gets called with the info_dict of
+ every video.
+ If it returns a message, the video is ignored.
+ If it returns None, the video is downloaded.
+ match_filter_func in utils.py is one example for this.
+ no_color: Do not emit color codes in output.
+ geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
+ HTTP header
+ geo_bypass_country:
+ Two-letter ISO 3166-2 country code that will be used for
+ explicit geographic restriction bypassing via faking
+ X-Forwarded-For HTTP header
+ geo_bypass_ip_block:
+ IP range in CIDR notation that will be used similarly to
+ geo_bypass_country
+
+ The following options determine which downloader is picked:
+ external_downloader: Executable of the external downloader to call.
+ None or unset for standard (built-in) downloader.
+ hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
+ if True, otherwise use ffmpeg/avconv if False, otherwise
+ use downloader suggested by extractor if None.
+
+ The following parameters are not used by YoutubeDL itself, they are used by
+ the downloader (see youtube_dlc/downloader/common.py):
+ nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
+ noresizebuffer, retries, continuedl, noprogress, consoletitle,
+ xattr_set_filesize, external_downloader_args, hls_use_mpegts,
+ http_chunk_size.
+
+ The following options are used by the post processors:
+ prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
+ otherwise prefer ffmpeg.
+ ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
+ to the binary or its containing directory.
+ postprocessor_args: A list of additional command-line arguments for the
+ postprocessor.
+
+ The following options are used by the Youtube extractor:
+ youtube_include_dash_manifest: If True (default), DASH manifests and related
+ data will be downloaded and processed by extractor.
+ You can reduce network I/O by disabling it if you don't
+ care about DASH.
+ """
+
+ _NUMERIC_FIELDS = set((
+ 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
+ 'timestamp', 'upload_year', 'upload_month', 'upload_day',
+ 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
+ 'average_rating', 'comment_count', 'age_limit',
+ 'start_time', 'end_time',
+ 'chapter_number', 'season_number', 'episode_number',
+ 'track_number', 'disc_number', 'release_year',
+ 'playlist_index',
+ ))
+
+ params = None
+ _ies = []
+ _pps = []
+ _download_retcode = None
+ _num_downloads = None
+ _screen_file = None
+
+ def __init__(self, params=None, auto_init=True):
+ """Create a FileDownloader object with the given options."""
+ if params is None:
+ params = {}
+ self._ies = []
+ self._ies_instances = {}
+ self._pps = []
+ self._progress_hooks = []
+ self._download_retcode = 0
+ self._num_downloads = 0
+ self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
+ self._err_file = sys.stderr
+ self.params = {
+ # Default parameters
+ 'nocheckcertificate': False,
+ }
+ self.params.update(params)
+ self.cache = Cache(self)
+
+ def check_deprecated(param, option, suggestion):
+ if self.params.get(param) is not None:
+ self.report_warning(
+ '%s is deprecated. Use %s instead.' % (option, suggestion))
+ return True
+ return False
+
+ if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
+ if self.params.get('geo_verification_proxy') is None:
+ self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
+
+ check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
+ check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
+ check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
+
+ if params.get('bidi_workaround', False):
+ try:
+ import pty
+ master, slave = pty.openpty()
+ width = compat_get_terminal_size().columns
+ if width is None:
+ width_args = []
+ else:
+ width_args = ['-w', str(width)]
+ sp_kwargs = dict(
+ stdin=subprocess.PIPE,
+ stdout=slave,
+ stderr=self._err_file)
+ try:
+ self._output_process = subprocess.Popen(
+ ['bidiv'] + width_args, **sp_kwargs
+ )
+ except OSError:
+ self._output_process = subprocess.Popen(
+ ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
+ self._output_channel = os.fdopen(master, 'rb')
+ except OSError as ose:
+ if ose.errno == errno.ENOENT:
+ self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
+ else:
+ raise
+
+ if (sys.platform != 'win32'
+ and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
+ and not params.get('restrictfilenames', False)):
+ # Unicode filesystem API will throw errors (#1474, #13027)
+ self.report_warning(
+ 'Assuming --restrict-filenames since file system encoding '
+ 'cannot encode all characters. '
+ 'Set the LC_ALL environment variable to fix this.')
+ self.params['restrictfilenames'] = True
+
+ if isinstance(params.get('outtmpl'), bytes):
+ self.report_warning(
+ 'Parameter outtmpl is bytes, but should be a unicode string. '
+ 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
+
+ self._setup_opener()
+
+ if auto_init:
+ self.print_debug_header()
+ self.add_default_info_extractors()
+
+ for pp_def_raw in self.params.get('postprocessors', []):
+ pp_class = get_postprocessor(pp_def_raw['key'])
+ pp_def = dict(pp_def_raw)
+ del pp_def['key']
+ pp = pp_class(self, **compat_kwargs(pp_def))
+ self.add_post_processor(pp)
+
+ for ph in self.params.get('progress_hooks', []):
+ self.add_progress_hook(ph)
+
+ register_socks_protocols()
+
+ def warn_if_short_id(self, argv):
+ # short YouTube ID starting with dash?
+ idxs = [
+ i for i, a in enumerate(argv)
+ if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
+ if idxs:
+ correct_argv = (
+ ['youtube-dlc']
+ + [a for i, a in enumerate(argv) if i not in idxs]
+ + ['--'] + [argv[i] for i in idxs]
+ )
+ self.report_warning(
+ 'Long argument string detected. '
+ 'Use -- to separate parameters and URLs, like this:\n%s\n' %
+ args_to_str(correct_argv))
+
+ def add_info_extractor(self, ie):
+ """Add an InfoExtractor object to the end of the list."""
+ self._ies.append(ie)
+ if not isinstance(ie, type):
+ self._ies_instances[ie.ie_key()] = ie
+ ie.set_downloader(self)
+
+ def get_info_extractor(self, ie_key):
+ """
+ Get an instance of an IE with name ie_key, it will try to get one from
+ the _ies list, if there's no instance it will create a new one and add
+ it to the extractor list.
+ """
+ ie = self._ies_instances.get(ie_key)
+ if ie is None:
+ ie = get_info_extractor(ie_key)()
+ self.add_info_extractor(ie)
+ return ie
+
+ def add_default_info_extractors(self):
+ """
+ Add the InfoExtractors returned by gen_extractors to the end of the list
+ """
+ for ie in gen_extractor_classes():
+ self.add_info_extractor(ie)
+
+ def add_post_processor(self, pp):
+ """Add a PostProcessor object to the end of the chain."""
+ self._pps.append(pp)
+ pp.set_downloader(self)
+
+ def add_progress_hook(self, ph):
+ """Add the progress hook (currently only for the file downloader)"""
+ self._progress_hooks.append(ph)
+
+ def _bidi_workaround(self, message):
+ if not hasattr(self, '_output_channel'):
+ return message
+
+ assert hasattr(self, '_output_process')
+ assert isinstance(message, compat_str)
+ line_count = message.count('\n') + 1
+ self._output_process.stdin.write((message + '\n').encode('utf-8'))
+ self._output_process.stdin.flush()
+ res = ''.join(self._output_channel.readline().decode('utf-8')
+ for _ in range(line_count))
+ return res[:-len('\n')]
+
+ def to_screen(self, message, skip_eol=False):
+ """Print message to stdout if not in quiet mode."""
+ return self.to_stdout(message, skip_eol, check_quiet=True)
+
+ def _write_string(self, s, out=None):
+ write_string(s, out=out, encoding=self.params.get('encoding'))
+
+ def to_stdout(self, message, skip_eol=False, check_quiet=False):
+ """Print message to stdout if not in quiet mode."""
+ if self.params.get('logger'):
+ self.params['logger'].debug(message)
+ elif not check_quiet or not self.params.get('quiet', False):
+ message = self._bidi_workaround(message)
+ terminator = ['\n', ''][skip_eol]
+ output = message + terminator
+
+ self._write_string(output, self._screen_file)
+
+ def to_stderr(self, message):
+ """Print message to stderr."""
+ assert isinstance(message, compat_str)
+ if self.params.get('logger'):
+ self.params['logger'].error(message)
+ else:
+ message = self._bidi_workaround(message)
+ output = message + '\n'
+ self._write_string(output, self._err_file)
+
+ def to_console_title(self, message):
+ if not self.params.get('consoletitle', False):
+ return
+ if compat_os_name == 'nt':
+ if ctypes.windll.kernel32.GetConsoleWindow():
+ # c_wchar_p() might not be necessary if `message` is
+ # already of type unicode()
+ ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
+ elif 'TERM' in os.environ:
+ self._write_string('\033]0;%s\007' % message, self._screen_file)
+
+ def save_console_title(self):
+ if not self.params.get('consoletitle', False):
+ return
+ if self.params.get('simulate', False):
+ return
+ if compat_os_name != 'nt' and 'TERM' in os.environ:
+ # Save the title on stack
+ self._write_string('\033[22;0t', self._screen_file)
+
+ def restore_console_title(self):
+ if not self.params.get('consoletitle', False):
+ return
+ if self.params.get('simulate', False):
+ return
+ if compat_os_name != 'nt' and 'TERM' in os.environ:
+ # Restore the title from stack
+ self._write_string('\033[23;0t', self._screen_file)
+
+ def __enter__(self):
+ self.save_console_title()
+ return self
+
+ def __exit__(self, *args):
+ self.restore_console_title()
+
+ if self.params.get('cookiefile') is not None:
+ self.cookiejar.save(ignore_discard=True, ignore_expires=True)
+
+ def trouble(self, message=None, tb=None):
+ """Determine action to take when a download problem appears.
+
+ Depending on if the downloader has been configured to ignore
+ download errors or not, this method may throw an exception or
+ not when errors are found, after printing the message.
+
+ tb, if given, is additional traceback information.
+ """
+ if message is not None:
+ self.to_stderr(message)
+ if self.params.get('verbose'):
+ if tb is None:
+ if sys.exc_info()[0]: # if .trouble has been called from an except block
+ tb = ''
+ if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
+ tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
+ tb += encode_compat_str(traceback.format_exc())
+ else:
+ tb_data = traceback.format_list(traceback.extract_stack())
+ tb = ''.join(tb_data)
+ self.to_stderr(tb)
+ if not self.params.get('ignoreerrors', False):
+ if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
+ exc_info = sys.exc_info()[1].exc_info
+ else:
+ exc_info = sys.exc_info()
+ raise DownloadError(message, exc_info)
+ self._download_retcode = 1
+
+ def report_warning(self, message):
+ '''
+ Print the message to stderr, it will be prefixed with 'WARNING:'
+ If stderr is a tty file the 'WARNING:' will be colored
+ '''
+ if self.params.get('logger') is not None:
+ self.params['logger'].warning(message)
+ else:
+ if self.params.get('no_warnings'):
+ return
+ if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
+ _msg_header = '\033[0;33mWARNING:\033[0m'
+ else:
+ _msg_header = 'WARNING:'
+ warning_message = '%s %s' % (_msg_header, message)
+ self.to_stderr(warning_message)
+
+ def report_error(self, message, tb=None):
+ '''
+ Do the same as trouble, but prefixes the message with 'ERROR:', colored
+ in red if stderr is a tty file.
+ '''
+ if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
+ _msg_header = '\033[0;31mERROR:\033[0m'
+ else:
+ _msg_header = 'ERROR:'
+ error_message = '%s %s' % (_msg_header, message)
+ self.trouble(error_message, tb)
+
+ def report_file_already_downloaded(self, file_name):
+ """Report file has already been fully downloaded."""
+ try:
+ self.to_screen('[download] %s has already been downloaded' % file_name)
+ except UnicodeEncodeError:
+ self.to_screen('[download] The file has already been downloaded')
+
+ def prepare_filename(self, info_dict):
+ """Generate the output filename."""
+ try:
+ template_dict = dict(info_dict)
+
+ template_dict['epoch'] = int(time.time())
+ autonumber_size = self.params.get('autonumber_size')
+ if autonumber_size is None:
+ autonumber_size = 5
+ template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
+ if template_dict.get('resolution') is None:
+ if template_dict.get('width') and template_dict.get('height'):
+ template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
+ elif template_dict.get('height'):
+ template_dict['resolution'] = '%sp' % template_dict['height']
+ elif template_dict.get('width'):
+ template_dict['resolution'] = '%dx?' % template_dict['width']
+
+ sanitize = lambda k, v: sanitize_filename(
+ compat_str(v),
+ restricted=self.params.get('restrictfilenames'),
+ is_id=(k == 'id' or k.endswith('_id')))
+ template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
+ for k, v in template_dict.items()
+ if v is not None and not isinstance(v, (list, tuple, dict)))
+ template_dict = collections.defaultdict(lambda: 'NA', template_dict)
+
+ outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
+
+ # For fields playlist_index and autonumber convert all occurrences
+ # of %(field)s to %(field)0Nd for backward compatibility
+ field_size_compat_map = {
+ 'playlist_index': len(str(template_dict['n_entries'])),
+ 'autonumber': autonumber_size,
+ }
+ FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
+ mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
+ if mobj:
+ outtmpl = re.sub(
+ FIELD_SIZE_COMPAT_RE,
+ r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
+ outtmpl)
+
+ # Missing numeric fields used together with integer presentation types
+ # in format specification will break the argument substitution since
+ # string 'NA' is returned for missing fields. We will patch output
+ # template for missing fields to meet string presentation type.
+ for numeric_field in self._NUMERIC_FIELDS:
+ if numeric_field not in template_dict:
+ # As of [1] format syntax is:
+ # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
+ # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
+ FORMAT_RE = r'''(?x)
+ (?<!%)
+ %
+ \({0}\) # mapping key
+ (?:[#0\-+ ]+)? # conversion flags (optional)
+ (?:\d+)? # minimum field width (optional)
+ (?:\.\d+)? # precision (optional)
+ [hlL]? # length modifier (optional)
+ [diouxXeEfFgGcrs%] # conversion type
+ '''
+ outtmpl = re.sub(
+ FORMAT_RE.format(numeric_field),
+ r'%({0})s'.format(numeric_field), outtmpl)
+
+ # expand_path translates '%%' into '%' and '$$' into '$'
+ # correspondingly that is not what we want since we need to keep
+ # '%%' intact for template dict substitution step. Working around
+ # with boundary-alike separator hack.
+ sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
+ outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
+
+ # outtmpl should be expand_path'ed before template dict substitution
+ # because meta fields may contain env variables we don't want to
+ # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
+ # title "Hello $PATH", we don't want `$PATH` to be expanded.
+ filename = expand_path(outtmpl).replace(sep, '') % template_dict
+
+ # Temporary fix for #4787
+ # 'Treat' all problem characters by passing filename through preferredencoding
+ # to workaround encoding issues with subprocess on python2 @ Windows
+ if sys.version_info < (3, 0) and sys.platform == 'win32':
+ filename = encodeFilename(filename, True).decode(preferredencoding())
+ return sanitize_path(filename)
+ except ValueError as err:
+ self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
+ return None
+
+ def _match_entry(self, info_dict, incomplete):
+ """ Returns None iff the file should be downloaded """
+
+ video_title = info_dict.get('title', info_dict.get('id', 'video'))
+ if 'title' in info_dict:
+ # This can happen when we're just evaluating the playlist
+ title = info_dict['title']
+ matchtitle = self.params.get('matchtitle', False)
+ if matchtitle:
+ if not re.search(matchtitle, title, re.IGNORECASE):
+ return '"' + title + '" title did not match pattern "' + matchtitle + '"'
+ rejecttitle = self.params.get('rejecttitle', False)
+ if rejecttitle:
+ if re.search(rejecttitle, title, re.IGNORECASE):
+ return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
+ date = info_dict.get('upload_date')
+ if date is not None:
+ dateRange = self.params.get('daterange', DateRange())
+ if date not in dateRange:
+ return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
+ view_count = info_dict.get('view_count')
+ if view_count is not None:
+ min_views = self.params.get('min_views')
+ if min_views is not None and view_count < min_views:
+ return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
+ max_views = self.params.get('max_views')
+ if max_views is not None and view_count > max_views:
+ return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
+ if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
+ return 'Skipping "%s" because it is age restricted' % video_title
+ if self.in_download_archive(info_dict):
+ return '%s has already been recorded in archive' % video_title
+
+ if not incomplete:
+ match_filter = self.params.get('match_filter')
+ if match_filter is not None:
+ ret = match_filter(info_dict)
+ if ret is not None:
+ return ret
+
+ return None
+
+ @staticmethod
+ def add_extra_info(info_dict, extra_info):
+ '''Set the keys from extra_info in info dict if they are missing'''
+ for key, value in extra_info.items():
+ info_dict.setdefault(key, value)
+
+ def extract_info(self, url, download=True, ie_key=None, extra_info={},
+ process=True, force_generic_extractor=False):
+ '''
+ Returns a list with a dictionary for each video we find.
+ If 'download', also downloads the videos.
+ extra_info is a dict containing the extra values to add to each result
+ '''
+
+ if not ie_key and force_generic_extractor:
+ ie_key = 'Generic'
+
+ if ie_key:
+ ies = [self.get_info_extractor(ie_key)]
+ else:
+ ies = self._ies
+
+ for ie in ies:
+ if not ie.suitable(url):
+ continue
+
+ ie = self.get_info_extractor(ie.ie_key())
+ if not ie.working():
+ self.report_warning('The program functionality for this site has been marked as broken, '
+ 'and will probably not work.')
+
+ try:
+ ie_result = ie.extract(url)
+ if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
+ break
+ if isinstance(ie_result, list):
+ # Backwards compatibility: old IE result format
+ ie_result = {
+ '_type': 'compat_list',
+ 'entries': ie_result,
+ }
+ self.add_default_extra_info(ie_result, ie, url)
+ if process:
+ return self.process_ie_result(ie_result, download, extra_info)
+ else:
+ return ie_result
+ except GeoRestrictedError as e:
+ msg = e.msg
+ if e.countries:
+ msg += '\nThis video is available in %s.' % ', '.join(
+ map(ISO3166Utils.short2full, e.countries))
+ msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
+ self.report_error(msg)
+ break
+ except ExtractorError as e: # An error we somewhat expected
+ self.report_error(compat_str(e), e.format_traceback())
+ break
+ except MaxDownloadsReached:
+ raise
+ except Exception as e:
+ if self.params.get('ignoreerrors', False):
+ self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
+ break
+ else:
+ raise
+ else:
+ self.report_error('no suitable InfoExtractor for URL %s' % url)
+
+ def add_default_extra_info(self, ie_result, ie, url):
+ self.add_extra_info(ie_result, {
+ 'extractor': ie.IE_NAME,
+ 'webpage_url': url,
+ 'webpage_url_basename': url_basename(url),
+ 'extractor_key': ie.ie_key(),
+ })
+
+ def process_ie_result(self, ie_result, download=True, extra_info={}):
+ """
+ Take the result of the ie(may be modified) and resolve all unresolved
+ references (URLs, playlist items).
+
+ It will also download the videos if 'download'.
+ Returns the resolved ie_result.
+ """
+ result_type = ie_result.get('_type', 'video')
+
+ if result_type in ('url', 'url_transparent'):
+ ie_result['url'] = sanitize_url(ie_result['url'])
+ extract_flat = self.params.get('extract_flat', False)
+ if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
+ or extract_flat is True):
+ self.__forced_printings(
+ ie_result, self.prepare_filename(ie_result),
+ incomplete=True)
+ return ie_result
+
+ if result_type == 'video':
+ self.add_extra_info(ie_result, extra_info)
+ return self.process_video_result(ie_result, download=download)
+ elif result_type == 'url':
+ # We have to add extra_info to the results because it may be
+ # contained in a playlist
+ return self.extract_info(ie_result['url'],
+ download,
+ ie_key=ie_result.get('ie_key'),
+ extra_info=extra_info)
+ elif result_type == 'url_transparent':
+ # Use the information from the embedding page
+ info = self.extract_info(
+ ie_result['url'], ie_key=ie_result.get('ie_key'),
+ extra_info=extra_info, download=False, process=False)
+
+ # extract_info may return None when ignoreerrors is enabled and
+ # extraction failed with an error, don't crash and return early
+ # in this case
+ if not info:
+ return info
+
+ force_properties = dict(
+ (k, v) for k, v in ie_result.items() if v is not None)
+ for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
+ if f in force_properties:
+ del force_properties[f]
+ new_result = info.copy()
+ new_result.update(force_properties)
+
+ # Extracted info may not be a video result (i.e.
+ # info.get('_type', 'video') != video) but rather an url or
+ # url_transparent. In such cases outer metadata (from ie_result)
+ # should be propagated to inner one (info). For this to happen
+ # _type of info should be overridden with url_transparent. This
+ # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
+ if new_result.get('_type') == 'url':
+ new_result['_type'] = 'url_transparent'
+
+ return self.process_ie_result(
+ new_result, download=download, extra_info=extra_info)
+ elif result_type in ('playlist', 'multi_video'):
+ # We process each entry in the playlist
+ playlist = ie_result.get('title') or ie_result.get('id')
+ self.to_screen('[download] Downloading playlist: %s' % playlist)
+
+ playlist_results = []
+
+ playliststart = self.params.get('playliststart', 1) - 1
+ playlistend = self.params.get('playlistend')
+ # For backwards compatibility, interpret -1 as whole list
+ if playlistend == -1:
+ playlistend = None
+
+ playlistitems_str = self.params.get('playlist_items')
+ playlistitems = None
+ if playlistitems_str is not None:
+ def iter_playlistitems(format):
+ for string_segment in format.split(','):
+ if '-' in string_segment:
+ start, end = string_segment.split('-')
+ for item in range(int(start), int(end) + 1):
+ yield int(item)
+ else:
+ yield int(string_segment)
+ playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
+
+ ie_entries = ie_result['entries']
+
+ def make_playlistitems_entries(list_ie_entries):
+ num_entries = len(list_ie_entries)
+ return [
+ list_ie_entries[i - 1] for i in playlistitems
+ if -num_entries <= i - 1 < num_entries]
+
+ def report_download(num_entries):
+ self.to_screen(
+ '[%s] playlist %s: Downloading %d videos' %
+ (ie_result['extractor'], playlist, num_entries))
+
+ if isinstance(ie_entries, list):
+ n_all_entries = len(ie_entries)
+ if playlistitems:
+ entries = make_playlistitems_entries(ie_entries)
+ else:
+ entries = ie_entries[playliststart:playlistend]
+ n_entries = len(entries)
+ self.to_screen(
+ '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
+ (ie_result['extractor'], playlist, n_all_entries, n_entries))
+ elif isinstance(ie_entries, PagedList):
+ if playlistitems:
+ entries = []
+ for item in playlistitems:
+ entries.extend(ie_entries.getslice(
+ item - 1, item
+ ))
+ else:
+ entries = ie_entries.getslice(
+ playliststart, playlistend)
+ n_entries = len(entries)
+ report_download(n_entries)
+ else: # iterable
+ if playlistitems:
+ entries = make_playlistitems_entries(list(itertools.islice(
+ ie_entries, 0, max(playlistitems))))
+ else:
+ entries = list(itertools.islice(
+ ie_entries, playliststart, playlistend))
+ n_entries = len(entries)
+ report_download(n_entries)
+
+ if self.params.get('playlistreverse', False):
+ entries = entries[::-1]
+
+ if self.params.get('playlistrandom', False):
+ random.shuffle(entries)
+
+ x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
+
+ for i, entry in enumerate(entries, 1):
+ self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
+ # This __x_forwarded_for_ip thing is a bit ugly but requires
+ # minimal changes
+ if x_forwarded_for:
+ entry['__x_forwarded_for_ip'] = x_forwarded_for
+ extra = {
+ 'n_entries': n_entries,
+ 'playlist': playlist,
+ 'playlist_id': ie_result.get('id'),
+ 'playlist_title': ie_result.get('title'),
+ 'playlist_uploader': ie_result.get('uploader'),
+ 'playlist_uploader_id': ie_result.get('uploader_id'),
+ 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
+ 'extractor': ie_result['extractor'],
+ 'webpage_url': ie_result['webpage_url'],
+ 'webpage_url_basename': url_basename(ie_result['webpage_url']),
+ 'extractor_key': ie_result['extractor_key'],
+ }
+
+ reason = self._match_entry(entry, incomplete=True)
+ if reason is not None:
+ self.to_screen('[download] ' + reason)
+ continue
+
+ entry_result = self.process_ie_result(entry,
+ download=download,
+ extra_info=extra)
+ playlist_results.append(entry_result)
+ ie_result['entries'] = playlist_results
+ self.to_screen('[download] Finished downloading playlist: %s' % playlist)
+ return ie_result
+ elif result_type == 'compat_list':
+ self.report_warning(
+ 'Extractor %s returned a compat_list result. '
+ 'It needs to be updated.' % ie_result.get('extractor'))
+
+ def _fixup(r):
+ self.add_extra_info(
+ r,
+ {
+ 'extractor': ie_result['extractor'],
+ 'webpage_url': ie_result['webpage_url'],
+ 'webpage_url_basename': url_basename(ie_result['webpage_url']),
+ 'extractor_key': ie_result['extractor_key'],
+ }
+ )
+ return r
+ ie_result['entries'] = [
+ self.process_ie_result(_fixup(r), download, extra_info)
+ for r in ie_result['entries']
+ ]
+ return ie_result
+ else:
+ raise Exception('Invalid result type: %s' % result_type)
+
+ def _build_format_filter(self, filter_spec):
+ " Returns a function to filter the formats according to the filter_spec "
+
+ OPERATORS = {
+ '<': operator.lt,
+ '<=': operator.le,
+ '>': operator.gt,
+ '>=': operator.ge,
+ '=': operator.eq,
+ '!=': operator.ne,
+ }
+ operator_rex = re.compile(r'''(?x)\s*
+ (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
+ \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
+ (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
+ $
+ ''' % '|'.join(map(re.escape, OPERATORS.keys())))
+ m = operator_rex.search(filter_spec)
+ if m:
+ try:
+ comparison_value = int(m.group('value'))
+ except ValueError:
+ comparison_value = parse_filesize(m.group('value'))
+ if comparison_value is None:
+ comparison_value = parse_filesize(m.group('value') + 'B')
+ if comparison_value is None:
+ raise ValueError(
+ 'Invalid value %r in format specification %r' % (
+ m.group('value'), filter_spec))
+ op = OPERATORS[m.group('op')]
+
+ if not m:
+ STR_OPERATORS = {
+ '=': operator.eq,
+ '^=': lambda attr, value: attr.startswith(value),
+ '$=': lambda attr, value: attr.endswith(value),
+ '*=': lambda attr, value: value in attr,
+ }
+ str_operator_rex = re.compile(r'''(?x)
+ \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
+ \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
+ \s*(?P<value>[a-zA-Z0-9._-]+)
+ \s*$
+ ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
+ m = str_operator_rex.search(filter_spec)
+ if m:
+ comparison_value = m.group('value')
+ str_op = STR_OPERATORS[m.group('op')]
+ if m.group('negation'):
+ op = lambda attr, value: not str_op(attr, value)
+ else:
+ op = str_op
+
+ if not m:
+ raise ValueError('Invalid filter specification %r' % filter_spec)
+
+ def _filter(f):
+ actual_value = f.get(m.group('key'))
+ if actual_value is None:
+ return m.group('none_inclusive')
+ return op(actual_value, comparison_value)
+ return _filter
+
+ def _default_format_spec(self, info_dict, download=True):
+
+ def can_merge():
+ merger = FFmpegMergerPP(self)
+ return merger.available and merger.can_merge()
+
+ def prefer_best():
+ if self.params.get('simulate', False):
+ return False
+ if not download:
+ return False
+ if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
+ return True
+ if info_dict.get('is_live'):
+ return True
+ if not can_merge():
+ return True
+ return False
+
+ req_format_list = ['bestvideo+bestaudio', 'best']
+ if prefer_best():
+ req_format_list.reverse()
+ return '/'.join(req_format_list)
+
+ def build_format_selector(self, format_spec):
+ def syntax_error(note, start):
+ message = (
+ 'Invalid format specification: '
+ '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
+ return SyntaxError(message)
+
+ PICKFIRST = 'PICKFIRST'
+ MERGE = 'MERGE'
+ SINGLE = 'SINGLE'
+ GROUP = 'GROUP'
+ FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
+
+ def _parse_filter(tokens):
+ filter_parts = []
+ for type, string, start, _, _ in tokens:
+ if type == tokenize.OP and string == ']':
+ return ''.join(filter_parts)
+ else:
+ filter_parts.append(string)
+
+ def _remove_unused_ops(tokens):
+ # Remove operators that we don't use and join them with the surrounding strings
+ # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
+ ALLOWED_OPS = ('/', '+', ',', '(', ')')
+ last_string, last_start, last_end, last_line = None, None, None, None
+ for type, string, start, end, line in tokens:
+ if type == tokenize.OP and string == '[':
+ if last_string:
+ yield tokenize.NAME, last_string, last_start, last_end, last_line
+ last_string = None
+ yield type, string, start, end, line
+ # everything inside brackets will be handled by _parse_filter
+ for type, string, start, end, line in tokens:
+ yield type, string, start, end, line
+ if type == tokenize.OP and string == ']':
+ break
+ elif type == tokenize.OP and string in ALLOWED_OPS:
+ if last_string:
+ yield tokenize.NAME, last_string, last_start, last_end, last_line
+ last_string = None
+ yield type, string, start, end, line
+ elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
+ if not last_string:
+ last_string = string
+ last_start = start
+ last_end = end
+ else:
+ last_string += string
+ if last_string:
+ yield tokenize.NAME, last_string, last_start, last_end, last_line
+
+ def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
+ selectors = []
+ current_selector = None
+ for type, string, start, _, _ in tokens:
+ # ENCODING is only defined in python 3.x
+ if type == getattr(tokenize, 'ENCODING', None):
+ continue
+ elif type in [tokenize.NAME, tokenize.NUMBER]:
+ current_selector = FormatSelector(SINGLE, string, [])
+ elif type == tokenize.OP:
+ if string == ')':
+ if not inside_group:
+ # ')' will be handled by the parentheses group
+ tokens.restore_last_token()
+ break
+ elif inside_merge and string in ['/', ',']:
+ tokens.restore_last_token()
+ break
+ elif inside_choice and string == ',':
+ tokens.restore_last_token()
+ break
+ elif string == ',':
+ if not current_selector:
+ raise syntax_error('"," must follow a format selector', start)
+ selectors.append(current_selector)
+ current_selector = None
+ elif string == '/':
+ if not current_selector:
+ raise syntax_error('"/" must follow a format selector', start)
+ first_choice = current_selector
+ second_choice = _parse_format_selection(tokens, inside_choice=True)
+ current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
+ elif string == '[':
+ if not current_selector:
+ current_selector = FormatSelector(SINGLE, 'best', [])
+ format_filter = _parse_filter(tokens)
+ current_selector.filters.append(format_filter)
+ elif string == '(':
+ if current_selector:
+ raise syntax_error('Unexpected "("', start)
+ group = _parse_format_selection(tokens, inside_group=True)
+ current_selector = FormatSelector(GROUP, group, [])
+ elif string == '+':
+ video_selector = current_selector
+ audio_selector = _parse_format_selection(tokens, inside_merge=True)
+ if not video_selector or not audio_selector:
+ raise syntax_error('"+" must be between two format selectors', start)
+ current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
+ else:
+ raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
+ elif type == tokenize.ENDMARKER:
+ break
+ if current_selector:
+ selectors.append(current_selector)
+ return selectors
+
+ def _build_selector_function(selector):
+ if isinstance(selector, list):
+ fs = [_build_selector_function(s) for s in selector]
+
+ def selector_function(ctx):
+ for f in fs:
+ for format in f(ctx):
+ yield format
+ return selector_function
+ elif selector.type == GROUP:
+ selector_function = _build_selector_function(selector.selector)
+ elif selector.type == PICKFIRST:
+ fs = [_build_selector_function(s) for s in selector.selector]
+
+ def selector_function(ctx):
+ for f in fs:
+ picked_formats = list(f(ctx))
+ if picked_formats:
+ return picked_formats
+ return []
+ elif selector.type == SINGLE:
+ format_spec = selector.selector
+
+ def selector_function(ctx):
+ formats = list(ctx['formats'])
+ if not formats:
+ return
+ if format_spec == 'all':
+ for f in formats:
+ yield f
+ elif format_spec in ['best', 'worst', None]:
+ format_idx = 0 if format_spec == 'worst' else -1
+ audiovideo_formats = [
+ f for f in formats
+ if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
+ if audiovideo_formats:
+ yield audiovideo_formats[format_idx]
+ # for extractors with incomplete formats (audio only (soundcloud)
+ # or video only (imgur)) we will fallback to best/worst
+ # {video,audio}-only format
+ elif ctx['incomplete_formats']:
+ yield formats[format_idx]
+ elif format_spec == 'bestaudio':
+ audio_formats = [
+ f for f in formats
+ if f.get('vcodec') == 'none']
+ if audio_formats:
+ yield audio_formats[-1]
+ elif format_spec == 'worstaudio':
+ audio_formats = [
+ f for f in formats
+ if f.get('vcodec') == 'none']
+ if audio_formats:
+ yield audio_formats[0]
+ elif format_spec == 'bestvideo':
+ video_formats = [
+ f for f in formats
+ if f.get('acodec') == 'none']
+ if video_formats:
+ yield video_formats[-1]
+ elif format_spec == 'worstvideo':
+ video_formats = [
+ f for f in formats
+ if f.get('acodec') == 'none']
+ if video_formats:
+ yield video_formats[0]
+ else:
+ extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
+ if format_spec in extensions:
+ filter_f = lambda f: f['ext'] == format_spec
+ else:
+ filter_f = lambda f: f['format_id'] == format_spec
+ matches = list(filter(filter_f, formats))
+ if matches:
+ yield matches[-1]
+ elif selector.type == MERGE:
+ def _merge(formats_info):
+ format_1, format_2 = [f['format_id'] for f in formats_info]
+ # The first format must contain the video and the
+ # second the audio
+ if formats_info[0].get('vcodec') == 'none':
+ self.report_error('The first format must '
+ 'contain the video, try using '
+ '"-f %s+%s"' % (format_2, format_1))
+ return
+ # Formats must be opposite (video+audio)
+ if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
+ self.report_error(
+ 'Both formats %s and %s are video-only, you must specify "-f video+audio"'
+ % (format_1, format_2))
+ return
+ output_ext = (
+ formats_info[0]['ext']
+ if self.params.get('merge_output_format') is None
+ else self.params['merge_output_format'])
+ return {
+ 'requested_formats': formats_info,
+ 'format': '%s+%s' % (formats_info[0].get('format'),
+ formats_info[1].get('format')),
+ 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
+ formats_info[1].get('format_id')),
+ 'width': formats_info[0].get('width'),
+ 'height': formats_info[0].get('height'),
+ 'resolution': formats_info[0].get('resolution'),
+ 'fps': formats_info[0].get('fps'),
+ 'vcodec': formats_info[0].get('vcodec'),
+ 'vbr': formats_info[0].get('vbr'),
+ 'stretched_ratio': formats_info[0].get('stretched_ratio'),
+ 'acodec': formats_info[1].get('acodec'),
+ 'abr': formats_info[1].get('abr'),
+ 'ext': output_ext,
+ }
+ video_selector, audio_selector = map(_build_selector_function, selector.selector)
+
+ def selector_function(ctx):
+ for pair in itertools.product(
+ video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
+ yield _merge(pair)
+
+ filters = [self._build_format_filter(f) for f in selector.filters]
+
+ def final_selector(ctx):
+ ctx_copy = copy.deepcopy(ctx)
+ for _filter in filters:
+ ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
+ return selector_function(ctx_copy)
+ return final_selector
+
+ stream = io.BytesIO(format_spec.encode('utf-8'))
+ try:
+ tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
+ except tokenize.TokenError:
+ raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
+
+ class TokenIterator(object):
+ def __init__(self, tokens):
+ self.tokens = tokens
+ self.counter = 0
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ if self.counter >= len(self.tokens):
+ raise StopIteration()
+ value = self.tokens[self.counter]
+ self.counter += 1
+ return value
+
+ next = __next__
+
+ def restore_last_token(self):
+ self.counter -= 1
+
+ parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
+ return _build_selector_function(parsed_selector)
+
+ def _calc_headers(self, info_dict):
+ res = std_headers.copy()
+
+ add_headers = info_dict.get('http_headers')
+ if add_headers:
+ res.update(add_headers)
+
+ cookies = self._calc_cookies(info_dict)
+ if cookies:
+ res['Cookie'] = cookies
+
+ if 'X-Forwarded-For' not in res:
+ x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
+ if x_forwarded_for_ip:
+ res['X-Forwarded-For'] = x_forwarded_for_ip
+
+ return res
+
+ def _calc_cookies(self, info_dict):
+ pr = sanitized_Request(info_dict['url'])
+ self.cookiejar.add_cookie_header(pr)
+ return pr.get_header('Cookie')
+
+ def process_video_result(self, info_dict, download=True):
+ assert info_dict.get('_type', 'video') == 'video'
+
+ if 'id' not in info_dict:
+ raise ExtractorError('Missing "id" field in extractor result')
+ if 'title' not in info_dict:
+ raise ExtractorError('Missing "title" field in extractor result')
+
+ def report_force_conversion(field, field_not, conversion):
+ self.report_warning(
+ '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
+ % (field, field_not, conversion))
+
+ def sanitize_string_field(info, string_field):
+ field = info.get(string_field)
+ if field is None or isinstance(field, compat_str):
+ return
+ report_force_conversion(string_field, 'a string', 'string')
+ info[string_field] = compat_str(field)
+
+ def sanitize_numeric_fields(info):
+ for numeric_field in self._NUMERIC_FIELDS:
+ field = info.get(numeric_field)
+ if field is None or isinstance(field, compat_numeric_types):
+ continue
+ report_force_conversion(numeric_field, 'numeric', 'int')
+ info[numeric_field] = int_or_none(field)
+
+ sanitize_string_field(info_dict, 'id')
+ sanitize_numeric_fields(info_dict)
+
+ if 'playlist' not in info_dict:
+ # It isn't part of a playlist
+ info_dict['playlist'] = None
+ info_dict['playlist_index'] = None
+
+ thumbnails = info_dict.get('thumbnails')
+ if thumbnails is None:
+ thumbnail = info_dict.get('thumbnail')
+ if thumbnail:
+ info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
+ if thumbnails:
+ thumbnails.sort(key=lambda t: (
+ t.get('preference') if t.get('preference') is not None else -1,
+ t.get('width') if t.get('width') is not None else -1,
+ t.get('height') if t.get('height') is not None else -1,
+ t.get('id') if t.get('id') is not None else '', t.get('url')))
+ for i, t in enumerate(thumbnails):
+ t['url'] = sanitize_url(t['url'])
+ if t.get('width') and t.get('height'):
+ t['resolution'] = '%dx%d' % (t['width'], t['height'])
+ if t.get('id') is None:
+ t['id'] = '%d' % i
+
+ if self.params.get('list_thumbnails'):
+ self.list_thumbnails(info_dict)
+ return
+
+ thumbnail = info_dict.get('thumbnail')
+ if thumbnail:
+ info_dict['thumbnail'] = sanitize_url(thumbnail)
+ elif thumbnails:
+ info_dict['thumbnail'] = thumbnails[-1]['url']
+
+ if 'display_id' not in info_dict and 'id' in info_dict:
+ info_dict['display_id'] = info_dict['id']
+
+ if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
+ # Working around out-of-range timestamp values (e.g. negative ones on Windows,
+ # see http://bugs.python.org/issue1646728)
+ try:
+ upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
+ info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
+ except (ValueError, OverflowError, OSError):
+ pass
+
+ # Auto generate title fields corresponding to the *_number fields when missing
+ # in order to always have clean titles. This is very common for TV series.
+ for field in ('chapter', 'season', 'episode'):
+ if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
+ info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
+
+ for cc_kind in ('subtitles', 'automatic_captions'):
+ cc = info_dict.get(cc_kind)
+ if cc:
+ for _, subtitle in cc.items():
+ for subtitle_format in subtitle:
+ if subtitle_format.get('url'):
+ subtitle_format['url'] = sanitize_url(subtitle_format['url'])
+ if subtitle_format.get('ext') is None:
+ subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
+
+ automatic_captions = info_dict.get('automatic_captions')
+ subtitles = info_dict.get('subtitles')
+
+ if self.params.get('listsubtitles', False):
+ if 'automatic_captions' in info_dict:
+ self.list_subtitles(
+ info_dict['id'], automatic_captions, 'automatic captions')
+ self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
+ return
+
+ info_dict['requested_subtitles'] = self.process_subtitles(
+ info_dict['id'], subtitles, automatic_captions)
+
+ # We now pick which formats have to be downloaded
+ if info_dict.get('formats') is None:
+ # There's only one format available
+ formats = [info_dict]
+ else:
+ formats = info_dict['formats']
+
+ if not formats:
+ raise ExtractorError('No video formats found!')
+
+ def is_wellformed(f):
+ url = f.get('url')
+ if not url:
+ self.report_warning(
+ '"url" field is missing or empty - skipping format, '
+ 'there is an error in extractor')
+ return False
+ if isinstance(url, bytes):
+ sanitize_string_field(f, 'url')
+ return True
+
+ # Filter out malformed formats for better extraction robustness
+ formats = list(filter(is_wellformed, formats))
+
+ formats_dict = {}
+
+ # We check that all the formats have the format and format_id fields
+ for i, format in enumerate(formats):
+ sanitize_string_field(format, 'format_id')
+ sanitize_numeric_fields(format)
+ format['url'] = sanitize_url(format['url'])
+ if not format.get('format_id'):
+ format['format_id'] = compat_str(i)
+ else:
+ # Sanitize format_id from characters used in format selector expression
+ format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
+ format_id = format['format_id']
+ if format_id not in formats_dict:
+ formats_dict[format_id] = []
+ formats_dict[format_id].append(format)
+
+ # Make sure all formats have unique format_id
+ for format_id, ambiguous_formats in formats_dict.items():
+ if len(ambiguous_formats) > 1:
+ for i, format in enumerate(ambiguous_formats):
+ format['format_id'] = '%s-%d' % (format_id, i)
+
+ for i, format in enumerate(formats):
+ if format.get('format') is None:
+ format['format'] = '{id} - {res}{note}'.format(
+ id=format['format_id'],
+ res=self.format_resolution(format),
+ note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
+ )
+ # Automatically determine file extension if missing
+ if format.get('ext') is None:
+ format['ext'] = determine_ext(format['url']).lower()
+ # Automatically determine protocol if missing (useful for format
+ # selection purposes)
+ if format.get('protocol') is None:
+ format['protocol'] = determine_protocol(format)
+ # Add HTTP headers, so that external programs can use them from the
+ # json output
+ full_format_info = info_dict.copy()
+ full_format_info.update(format)
+ format['http_headers'] = self._calc_headers(full_format_info)
+ # Remove private housekeeping stuff
+ if '__x_forwarded_for_ip' in info_dict:
+ del info_dict['__x_forwarded_for_ip']
+
+ # TODO Central sorting goes here
+
+ if formats[0] is not info_dict:
+ # only set the 'formats' fields if the original info_dict list them
+ # otherwise we end up with a circular reference, the first (and unique)
+ # element in the 'formats' field in info_dict is info_dict itself,
+ # which can't be exported to json
+ info_dict['formats'] = formats
+ if self.params.get('listformats'):
+ self.list_formats(info_dict)
+ return
+
+ req_format = self.params.get('format')
+ if req_format is None:
+ req_format = self._default_format_spec(info_dict, download=download)
+ if self.params.get('verbose'):
+ self.to_stdout('[debug] Default format spec: %s' % req_format)
+
+ format_selector = self.build_format_selector(req_format)
+
+ # While in format selection we may need to have an access to the original
+ # format set in order to calculate some metrics or do some processing.
+ # For now we need to be able to guess whether original formats provided
+ # by extractor are incomplete or not (i.e. whether extractor provides only
+ # video-only or audio-only formats) for proper formats selection for
+ # extractors with such incomplete formats (see
+ # https://github.com/ytdl-org/youtube-dl/pull/5556).
+ # Since formats may be filtered during format selection and may not match
+ # the original formats the results may be incorrect. Thus original formats
+ # or pre-calculated metrics should be passed to format selection routines
+ # as well.
+ # We will pass a context object containing all necessary additional data
+ # instead of just formats.
+ # This fixes incorrect format selection issue (see
+ # https://github.com/ytdl-org/youtube-dl/issues/10083).
+ incomplete_formats = (
+ # All formats are video-only or
+ all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
+ # all formats are audio-only
+ or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
+
+ ctx = {
+ 'formats': formats,
+ 'incomplete_formats': incomplete_formats,
+ }
+
+ formats_to_download = list(format_selector(ctx))
+ if not formats_to_download:
+ raise ExtractorError('requested format not available',
+ expected=True)
+
+ if download:
+ if len(formats_to_download) > 1:
+ self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
+ for format in formats_to_download:
+ new_info = dict(info_dict)
+ new_info.update(format)
+ self.process_info(new_info)
+ # We update the info dict with the best quality format (backwards compatibility)
+ info_dict.update(formats_to_download[-1])
+ return info_dict
+
+ def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
+ """Select the requested subtitles and their format"""
+ available_subs = {}
+ if normal_subtitles and self.params.get('writesubtitles'):
+ available_subs.update(normal_subtitles)
+ if automatic_captions and self.params.get('writeautomaticsub'):
+ for lang, cap_info in automatic_captions.items():
+ if lang not in available_subs:
+ available_subs[lang] = cap_info
+
+ if (not self.params.get('writesubtitles') and not
+ self.params.get('writeautomaticsub') or not
+ available_subs):
+ return None
+
+ if self.params.get('allsubtitles', False):
+ requested_langs = available_subs.keys()
+ else:
+ if self.params.get('subtitleslangs', False):
+ requested_langs = self.params.get('subtitleslangs')
+ elif 'en' in available_subs:
+ requested_langs = ['en']
+ else:
+ requested_langs = [list(available_subs.keys())[0]]
+
+ formats_query = self.params.get('subtitlesformat', 'best')
+ formats_preference = formats_query.split('/') if formats_query else []
+ subs = {}
+ for lang in requested_langs:
+ formats = available_subs.get(lang)
+ if formats is None:
+ self.report_warning('%s subtitles not available for %s' % (lang, video_id))
+ continue
+ for ext in formats_preference:
+ if ext == 'best':
+ f = formats[-1]
+ break
+ matches = list(filter(lambda f: f['ext'] == ext, formats))
+ if matches:
+ f = matches[-1]
+ break
+ else:
+ f = formats[-1]
+ self.report_warning(
+ 'No subtitle format found matching "%s" for language %s, '
+ 'using %s' % (formats_query, lang, f['ext']))
+ subs[lang] = f
+ return subs
+
+ def __forced_printings(self, info_dict, filename, incomplete):
+ def print_mandatory(field):
+ if (self.params.get('force%s' % field, False)
+ and (not incomplete or info_dict.get(field) is not None)):
+ self.to_stdout(info_dict[field])
+
+ def print_optional(field):
+ if (self.params.get('force%s' % field, False)
+ and info_dict.get(field) is not None):
+ self.to_stdout(info_dict[field])
+
+ print_mandatory('title')
+ print_mandatory('id')
+ if self.params.get('forceurl', False) and not incomplete:
+ if info_dict.get('requested_formats') is not None:
+ for f in info_dict['requested_formats']:
+ self.to_stdout(f['url'] + f.get('play_path', ''))
+ else:
+ # For RTMP URLs, also include the playpath
+ self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
+ print_optional('thumbnail')
+ print_optional('description')
+ if self.params.get('forcefilename', False) and filename is not None:
+ self.to_stdout(filename)
+ if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
+ self.to_stdout(formatSeconds(info_dict['duration']))
+ print_mandatory('format')
+ if self.params.get('forcejson', False):
+ self.to_stdout(json.dumps(info_dict))
+
+ def process_info(self, info_dict):
+ """Process a single resolved IE result."""
+
+ assert info_dict.get('_type', 'video') == 'video'
+
+ max_downloads = self.params.get('max_downloads')
+ if max_downloads is not None:
+ if self._num_downloads >= int(max_downloads):
+ raise MaxDownloadsReached()
+
+ # TODO: backward compatibility, to be removed
+ info_dict['fulltitle'] = info_dict['title']
+
+ if 'format' not in info_dict:
+ info_dict['format'] = info_dict['ext']
+
+ reason = self._match_entry(info_dict, incomplete=False)
+ if reason is not None:
+ self.to_screen('[download] ' + reason)
+ return
+
+ self._num_downloads += 1
+
+ info_dict['_filename'] = filename = self.prepare_filename(info_dict)
+
+ # Forced printings
+ self.__forced_printings(info_dict, filename, incomplete=False)
+
+ # Do nothing else if in simulate mode
+ if self.params.get('simulate', False):
+ return
+
+ if filename is None:
+ return
+
+ def ensure_dir_exists(path):
+ try:
+ dn = os.path.dirname(path)
+ if dn and not os.path.exists(dn):
+ os.makedirs(dn)
+ return True
+ except (OSError, IOError) as err:
+ self.report_error('unable to create directory ' + error_to_compat_str(err))
+ return False
+
+ if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
+ return
+
+ if self.params.get('writedescription', False):
+ descfn = replace_extension(filename, 'description', info_dict.get('ext'))
+ if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
+ self.to_screen('[info] Video description is already present')
+ elif info_dict.get('description') is None:
+ self.report_warning('There\'s no description to write.')
+ else:
+ try:
+ self.to_screen('[info] Writing video description to: ' + descfn)
+ with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
+ descfile.write(info_dict['description'])
+ except (OSError, IOError):
+ self.report_error('Cannot write description file ' + descfn)
+ return
+
+ if self.params.get('writeannotations', False):
+ annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
+ if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
+ self.to_screen('[info] Video annotations are already present')
+ elif not info_dict.get('annotations'):
+ self.report_warning('There are no annotations to write.')
+ else:
+ try:
+ self.to_screen('[info] Writing video annotations to: ' + annofn)
+ with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
+ annofile.write(info_dict['annotations'])
+ except (KeyError, TypeError):
+ self.report_warning('There are no annotations to write.')
+ except (OSError, IOError):
+ self.report_error('Cannot write annotations file: ' + annofn)
+ return
+
+ def dl(name, info):
+ fd = get_suitable_downloader(info, self.params)(self, self.params)
+ for ph in self._progress_hooks:
+ fd.add_progress_hook(ph)
+ if self.params.get('verbose'):
+ self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
+ return fd.download(name, info)
+
+ subtitles_are_requested = any([self.params.get('writesubtitles', False),
+ self.params.get('writeautomaticsub')])
+
+ if subtitles_are_requested and info_dict.get('requested_subtitles'):
+ # subtitles download errors are already managed as troubles in relevant IE
+ # that way it will silently go on when used with unsupporting IE
+ subtitles = info_dict['requested_subtitles']
+ for sub_lang, sub_info in subtitles.items():
+ sub_format = sub_info['ext']
+ sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
+ if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
+ self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
+ else:
+ if sub_info.get('data') is not None:
+ try:
+ # Use newline='' to prevent conversion of newline characters
+ # See https://github.com/ytdl-org/youtube-dl/issues/10268
+ with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
+ subfile.write(sub_info['data'])
+ except (OSError, IOError):
+ self.report_error('Cannot write subtitles file ' + sub_filename)
+ return
+ else:
+ try:
+ dl(sub_filename, sub_info)
+ except (ExtractorError, IOError, OSError, ValueError,
+ compat_urllib_error.URLError,
+ compat_http_client.HTTPException,
+ socket.error) as err:
+ self.report_warning('Unable to download subtitle for "%s": %s' %
+ (sub_lang, error_to_compat_str(err)))
+ continue
+
+ if self.params.get('writeinfojson', False):
+ infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
+ if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
+ self.to_screen('[info] Video description metadata is already present')
+ else:
+ self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
+ try:
+ write_json_file(self.filter_requested_info(info_dict), infofn)
+ except (OSError, IOError):
+ self.report_error('Cannot write metadata to JSON file ' + infofn)
+ return
+
+ self._write_thumbnails(info_dict, filename)
+
+ if not self.params.get('skip_download', False):
+ try:
+ if info_dict.get('requested_formats') is not None:
+ downloaded = []
+ success = True
+ merger = FFmpegMergerPP(self)
+ if not merger.available:
+ postprocessors = []
+ self.report_warning('You have requested multiple '
+ 'formats but ffmpeg or avconv are not installed.'
+ ' The formats won\'t be merged.')
+ else:
+ postprocessors = [merger]
+
+ def compatible_formats(formats):
+ video, audio = formats
+ # Check extension
+ video_ext, audio_ext = video.get('ext'), audio.get('ext')
+ if video_ext and audio_ext:
+ COMPATIBLE_EXTS = (
+ ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
+ ('webm')
+ )
+ for exts in COMPATIBLE_EXTS:
+ if video_ext in exts and audio_ext in exts:
+ return True
+ # TODO: Check acodec/vcodec
+ return False
+
+ filename_real_ext = os.path.splitext(filename)[1][1:]
+ filename_wo_ext = (
+ os.path.splitext(filename)[0]
+ if filename_real_ext == info_dict['ext']
+ else filename)
+ requested_formats = info_dict['requested_formats']
+ if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
+ info_dict['ext'] = 'mkv'
+ self.report_warning(
+ 'Requested formats are incompatible for merge and will be merged into mkv.')
+ # Ensure filename always has a correct extension for successful merge
+ filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
+ if os.path.exists(encodeFilename(filename)):
+ self.to_screen(
+ '[download] %s has already been downloaded and '
+ 'merged' % filename)
+ else:
+ for f in requested_formats:
+ new_info = dict(info_dict)
+ new_info.update(f)
+ fname = prepend_extension(
+ self.prepare_filename(new_info),
+ 'f%s' % f['format_id'], new_info['ext'])
+ if not ensure_dir_exists(fname):
+ return
+ downloaded.append(fname)
+ partial_success = dl(fname, new_info)
+ success = success and partial_success
+ info_dict['__postprocessors'] = postprocessors
+ info_dict['__files_to_merge'] = downloaded
+ else:
+ # Just a single file
+ success = dl(filename, info_dict)
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self.report_error('unable to download video data: %s' % error_to_compat_str(err))
+ return
+ except (OSError, IOError) as err:
+ raise UnavailableVideoError(err)
+ except (ContentTooShortError, ) as err:
+ self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
+ return
+
+ if success and filename != '-':
+ # Fixup content
+ fixup_policy = self.params.get('fixup')
+ if fixup_policy is None:
+ fixup_policy = 'detect_or_warn'
+
+ INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
+
+ stretched_ratio = info_dict.get('stretched_ratio')
+ if stretched_ratio is not None and stretched_ratio != 1:
+ if fixup_policy == 'warn':
+ self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
+ info_dict['id'], stretched_ratio))
+ elif fixup_policy == 'detect_or_warn':
+ stretched_pp = FFmpegFixupStretchedPP(self)
+ if stretched_pp.available:
+ info_dict.setdefault('__postprocessors', [])
+ info_dict['__postprocessors'].append(stretched_pp)
+ else:
+ self.report_warning(
+ '%s: Non-uniform pixel ratio (%s). %s'
+ % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
+ else:
+ assert fixup_policy in ('ignore', 'never')
+
+ if (info_dict.get('requested_formats') is None
+ and info_dict.get('container') == 'm4a_dash'):
+ if fixup_policy == 'warn':
+ self.report_warning(
+ '%s: writing DASH m4a. '
+ 'Only some players support this container.'
+ % info_dict['id'])
+ elif fixup_policy == 'detect_or_warn':
+ fixup_pp = FFmpegFixupM4aPP(self)
+ if fixup_pp.available:
+ info_dict.setdefault('__postprocessors', [])
+ info_dict['__postprocessors'].append(fixup_pp)
+ else:
+ self.report_warning(
+ '%s: writing DASH m4a. '
+ 'Only some players support this container. %s'
+ % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
+ else:
+ assert fixup_policy in ('ignore', 'never')
+
+ if (info_dict.get('protocol') == 'm3u8_native'
+ or info_dict.get('protocol') == 'm3u8'
+ and self.params.get('hls_prefer_native')):
+ if fixup_policy == 'warn':
+ self.report_warning('%s: malformed AAC bitstream detected.' % (
+ info_dict['id']))
+ elif fixup_policy == 'detect_or_warn':
+ fixup_pp = FFmpegFixupM3u8PP(self)
+ if fixup_pp.available:
+ info_dict.setdefault('__postprocessors', [])
+ info_dict['__postprocessors'].append(fixup_pp)
+ else:
+ self.report_warning(
+ '%s: malformed AAC bitstream detected. %s'
+ % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
+ else:
+ assert fixup_policy in ('ignore', 'never')
+
+ try:
+ self.post_process(filename, info_dict)
+ except (PostProcessingError) as err:
+ self.report_error('postprocessing: %s' % str(err))
+ return
+ self.record_download_archive(info_dict)
+
+ def download(self, url_list):
+ """Download a given list of URLs."""
+ outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
+ if (len(url_list) > 1
+ and outtmpl != '-'
+ and '%' not in outtmpl
+ and self.params.get('max_downloads') != 1):
+ raise SameFileError(outtmpl)
+
+ for url in url_list:
+ try:
+ # It also downloads the videos
+ res = self.extract_info(
+ url, force_generic_extractor=self.params.get('force_generic_extractor', False))
+ except UnavailableVideoError:
+ self.report_error('unable to download video')
+ except MaxDownloadsReached:
+ self.to_screen('[info] Maximum number of downloaded files reached.')
+ raise
+ else:
+ if self.params.get('dump_single_json', False):
+ self.to_stdout(json.dumps(res))
+
+ return self._download_retcode
+
+ def download_with_info_file(self, info_filename):
+ with contextlib.closing(fileinput.FileInput(
+ [info_filename], mode='r',
+ openhook=fileinput.hook_encoded('utf-8'))) as f:
+ # FileInput doesn't have a read method, we can't call json.load
+ info = self.filter_requested_info(json.loads('\n'.join(f)))
+ try:
+ self.process_ie_result(info, download=True)
+ except DownloadError:
+ webpage_url = info.get('webpage_url')
+ if webpage_url is not None:
+ self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
+ return self.download([webpage_url])
+ else:
+ raise
+ return self._download_retcode
+
+ @staticmethod
+ def filter_requested_info(info_dict):
+ return dict(
+ (k, v) for k, v in info_dict.items()
+ if k not in ['requested_formats', 'requested_subtitles'])
+
+ def post_process(self, filename, ie_info):
+ """Run all the postprocessors on the given file."""
+ info = dict(ie_info)
+ info['filepath'] = filename
+ pps_chain = []
+ if ie_info.get('__postprocessors') is not None:
+ pps_chain.extend(ie_info['__postprocessors'])
+ pps_chain.extend(self._pps)
+ for pp in pps_chain:
+ files_to_delete = []
+ try:
+ files_to_delete, info = pp.run(info)
+ except PostProcessingError as e:
+ self.report_error(e.msg)
+ if files_to_delete and not self.params.get('keepvideo', False):
+ for old_filename in files_to_delete:
+ self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
+ try:
+ os.remove(encodeFilename(old_filename))
+ except (IOError, OSError):
+ self.report_warning('Unable to remove downloaded original file')
+
+ def _make_archive_id(self, info_dict):
+ video_id = info_dict.get('id')
+ if not video_id:
+ return
+ # Future-proof against any change in case
+ # and backwards compatibility with prior versions
+ extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
+ if extractor is None:
+ url = str_or_none(info_dict.get('url'))
+ if not url:
+ return
+ # Try to find matching extractor for the URL and take its ie_key
+ for ie in self._ies:
+ if ie.suitable(url):
+ extractor = ie.ie_key()
+ break
+ else:
+ return
+ return extractor.lower() + ' ' + video_id
+
+ def in_download_archive(self, info_dict):
+ fn = self.params.get('download_archive')
+ if fn is None:
+ return False
+
+ vid_id = self._make_archive_id(info_dict)
+ if not vid_id:
+ return False # Incomplete video information
+
+ try:
+ with locked_file(fn, 'r', encoding='utf-8') as archive_file:
+ for line in archive_file:
+ if line.strip() == vid_id:
+ return True
+ except IOError as ioe:
+ if ioe.errno != errno.ENOENT:
+ raise
+ return False
+
+ def record_download_archive(self, info_dict):
+ fn = self.params.get('download_archive')
+ if fn is None:
+ return
+ vid_id = self._make_archive_id(info_dict)
+ assert vid_id
+ with locked_file(fn, 'a', encoding='utf-8') as archive_file:
+ archive_file.write(vid_id + '\n')
+
+ @staticmethod
+ def format_resolution(format, default='unknown'):
+ if format.get('vcodec') == 'none':
+ return 'audio only'
+ if format.get('resolution') is not None:
+ return format['resolution']
+ if format.get('height') is not None:
+ if format.get('width') is not None:
+ res = '%sx%s' % (format['width'], format['height'])
+ else:
+ res = '%sp' % format['height']
+ elif format.get('width') is not None:
+ res = '%dx?' % format['width']
+ else:
+ res = default
+ return res
+
+ def _format_note(self, fdict):
+ res = ''
+ if fdict.get('ext') in ['f4f', 'f4m']:
+ res += '(unsupported) '
+ if fdict.get('language'):
+ if res:
+ res += ' '
+ res += '[%s] ' % fdict['language']
+ if fdict.get('format_note') is not None:
+ res += fdict['format_note'] + ' '
+ if fdict.get('tbr') is not None:
+ res += '%4dk ' % fdict['tbr']
+ if fdict.get('container') is not None:
+ if res:
+ res += ', '
+ res += '%s container' % fdict['container']
+ if (fdict.get('vcodec') is not None
+ and fdict.get('vcodec') != 'none'):
+ if res:
+ res += ', '
+ res += fdict['vcodec']
+ if fdict.get('vbr') is not None:
+ res += '@'
+ elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
+ res += 'video@'
+ if fdict.get('vbr') is not None:
+ res += '%4dk' % fdict['vbr']
+ if fdict.get('fps') is not None:
+ if res:
+ res += ', '
+ res += '%sfps' % fdict['fps']
+ if fdict.get('acodec') is not None:
+ if res:
+ res += ', '
+ if fdict['acodec'] == 'none':
+ res += 'video only'
+ else:
+ res += '%-5s' % fdict['acodec']
+ elif fdict.get('abr') is not None:
+ if res:
+ res += ', '
+ res += 'audio'
+ if fdict.get('abr') is not None:
+ res += '@%3dk' % fdict['abr']
+ if fdict.get('asr') is not None:
+ res += ' (%5dHz)' % fdict['asr']
+ if fdict.get('filesize') is not None:
+ if res:
+ res += ', '
+ res += format_bytes(fdict['filesize'])
+ elif fdict.get('filesize_approx') is not None:
+ if res:
+ res += ', '
+ res += '~' + format_bytes(fdict['filesize_approx'])
+ return res
+
+ def list_formats(self, info_dict):
+ formats = info_dict.get('formats', [info_dict])
+ table = [
+ [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
+ for f in formats
+ if f.get('preference') is None or f['preference'] >= -1000]
+ if len(formats) > 1:
+ table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
+
+ header_line = ['format code', 'extension', 'resolution', 'note']
+ self.to_screen(
+ '[info] Available formats for %s:\n%s' %
+ (info_dict['id'], render_table(header_line, table)))
+
+ def list_thumbnails(self, info_dict):
+ thumbnails = info_dict.get('thumbnails')
+ if not thumbnails:
+ self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
+ return
+
+ self.to_screen(
+ '[info] Thumbnails for %s:' % info_dict['id'])
+ self.to_screen(render_table(
+ ['ID', 'width', 'height', 'URL'],
+ [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
+
+ def list_subtitles(self, video_id, subtitles, name='subtitles'):
+ if not subtitles:
+ self.to_screen('%s has no %s' % (video_id, name))
+ return
+ self.to_screen(
+ 'Available %s for %s:' % (name, video_id))
+ self.to_screen(render_table(
+ ['Language', 'formats'],
+ [[lang, ', '.join(f['ext'] for f in reversed(formats))]
+ for lang, formats in subtitles.items()]))
+
+ def urlopen(self, req):
+ """ Start an HTTP download """
+ if isinstance(req, compat_basestring):
+ req = sanitized_Request(req)
+ return self._opener.open(req, timeout=self._socket_timeout)
+
+ def print_debug_header(self):
+ if not self.params.get('verbose'):
+ return
+
+ if type('') is not compat_str:
+ # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
+ self.report_warning(
+ 'Your Python is broken! Update to a newer and supported version')
+
+ stdout_encoding = getattr(
+ sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
+ encoding_str = (
+ '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
+ locale.getpreferredencoding(),
+ sys.getfilesystemencoding(),
+ stdout_encoding,
+ self.get_encoding()))
+ write_string(encoding_str, encoding=None)
+
+ self._write_string('[debug] youtube-dlc version ' + __version__ + '\n')
+ if _LAZY_LOADER:
+ self._write_string('[debug] Lazy loading extractors enabled' + '\n')
+ try:
+ sp = subprocess.Popen(
+ ['git', 'rev-parse', '--short', 'HEAD'],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+ cwd=os.path.dirname(os.path.abspath(__file__)))
+ out, err = sp.communicate()
+ out = out.decode().strip()
+ if re.match('[0-9a-f]+', out):
+ self._write_string('[debug] Git HEAD: ' + out + '\n')
+ except Exception:
+ try:
+ sys.exc_clear()
+ except Exception:
+ pass
+
+ def python_implementation():
+ impl_name = platform.python_implementation()
+ if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
+ return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
+ return impl_name
+
+ self._write_string('[debug] Python version %s (%s) - %s\n' % (
+ platform.python_version(), python_implementation(),
+ platform_name()))
+
+ exe_versions = FFmpegPostProcessor.get_versions(self)
+ exe_versions['rtmpdump'] = rtmpdump_version()
+ exe_versions['phantomjs'] = PhantomJSwrapper._version()
+ exe_str = ', '.join(
+ '%s %s' % (exe, v)
+ for exe, v in sorted(exe_versions.items())
+ if v
+ )
+ if not exe_str:
+ exe_str = 'none'
+ self._write_string('[debug] exe versions: %s\n' % exe_str)
+
+ proxy_map = {}
+ for handler in self._opener.handlers:
+ if hasattr(handler, 'proxies'):
+ proxy_map.update(handler.proxies)
+ self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
+
+ if self.params.get('call_home', False):
+ ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
+ self._write_string('[debug] Public IP address: %s\n' % ipaddr)
+ latest_version = self.urlopen(
+ 'https://yt-dl.org/latest/version').read().decode('utf-8')
+ if version_tuple(latest_version) > version_tuple(__version__):
+ self.report_warning(
+ 'You are using an outdated version (newest version: %s)! '
+ 'See https://yt-dl.org/update if you need help updating.' %
+ latest_version)
+
+ def _setup_opener(self):
+ timeout_val = self.params.get('socket_timeout')
+ self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
+
+ opts_cookiefile = self.params.get('cookiefile')
+ opts_proxy = self.params.get('proxy')
+
+ if opts_cookiefile is None:
+ self.cookiejar = compat_cookiejar.CookieJar()
+ else:
+ opts_cookiefile = expand_path(opts_cookiefile)
+ self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
+ if os.access(opts_cookiefile, os.R_OK):
+ self.cookiejar.load(ignore_discard=True, ignore_expires=True)
+
+ cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
+ if opts_proxy is not None:
+ if opts_proxy == '':
+ proxies = {}
+ else:
+ proxies = {'http': opts_proxy, 'https': opts_proxy}
+ else:
+ proxies = compat_urllib_request.getproxies()
+ # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
+ if 'http' in proxies and 'https' not in proxies:
+ proxies['https'] = proxies['http']
+ proxy_handler = PerRequestProxyHandler(proxies)
+
+ debuglevel = 1 if self.params.get('debug_printtraffic') else 0
+ https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
+ ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
+ redirect_handler = YoutubeDLRedirectHandler()
+ data_handler = compat_urllib_request_DataHandler()
+
+ # When passing our own FileHandler instance, build_opener won't add the
+ # default FileHandler and allows us to disable the file protocol, which
+ # can be used for malicious purposes (see
+ # https://github.com/ytdl-org/youtube-dl/issues/8227)
+ file_handler = compat_urllib_request.FileHandler()
+
+ def file_open(*args, **kwargs):
+ raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dlc for security reasons')
+ file_handler.file_open = file_open
+
+ opener = compat_urllib_request.build_opener(
+ proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
+
+ # Delete the default user-agent header, which would otherwise apply in
+ # cases where our custom HTTP handler doesn't come into play
+ # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
+ opener.addheaders = []
+ self._opener = opener
+
+ def encode(self, s):
+ if isinstance(s, bytes):
+ return s # Already encoded
+
+ try:
+ return s.encode(self.get_encoding())
+ except UnicodeEncodeError as err:
+ err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
+ raise
+
+ def get_encoding(self):
+ encoding = self.params.get('encoding')
+ if encoding is None:
+ encoding = preferredencoding()
+ return encoding
+
+ def _write_thumbnails(self, info_dict, filename):
+ if self.params.get('writethumbnail', False):
+ thumbnails = info_dict.get('thumbnails')
+ if thumbnails:
+ thumbnails = [thumbnails[-1]]
+ elif self.params.get('write_all_thumbnails', False):
+ thumbnails = info_dict.get('thumbnails')
+ else:
+ return
+
+ if not thumbnails:
+ # No thumbnails present, so return immediately
+ return
+
+ for t in thumbnails:
+ thumb_ext = determine_ext(t['url'], 'jpg')
+ suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
+ thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
+ t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
+
+ if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
+ self.to_screen('[%s] %s: Thumbnail %sis already present' %
+ (info_dict['extractor'], info_dict['id'], thumb_display_id))
+ else:
+ self.to_screen('[%s] %s: Downloading thumbnail %s...' %
+ (info_dict['extractor'], info_dict['id'], thumb_display_id))
+ try:
+ uf = self.urlopen(t['url'])
+ with open(encodeFilename(thumb_filename), 'wb') as thumbf:
+ shutil.copyfileobj(uf, thumbf)
+ self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
+ (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self.report_warning('Unable to download thumbnail "%s": %s' %
+ (t['url'], error_to_compat_str(err)))
diff --git a/youtube_dlc/__init__.py b/youtube_dlc/__init__.py
new file mode 100644
index 000000000..a663417da
--- /dev/null
+++ b/youtube_dlc/__init__.py
@@ -0,0 +1,483 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+__license__ = 'Public Domain'
+
+import codecs
+import io
+import os
+import random
+import sys
+
+
+from .options import (
+ parseOpts,
+)
+from .compat import (
+ compat_getpass,
+ compat_shlex_split,
+ workaround_optparse_bug9161,
+)
+from .utils import (
+ DateRange,
+ decodeOption,
+ DEFAULT_OUTTMPL,
+ DownloadError,
+ expand_path,
+ match_filter_func,
+ MaxDownloadsReached,
+ preferredencoding,
+ read_batch_urls,
+ SameFileError,
+ setproctitle,
+ std_headers,
+ write_string,
+ render_table,
+)
+from .update import update_self
+from .downloader import (
+ FileDownloader,
+)
+from .extractor import gen_extractors, list_extractors
+from .extractor.adobepass import MSO_INFO
+from .YoutubeDL import YoutubeDL
+
+
+def _real_main(argv=None):
+ # Compatibility fixes for Windows
+ if sys.platform == 'win32':
+ # https://github.com/ytdl-org/youtube-dl/issues/820
+ codecs.register(lambda name: codecs.lookup('utf-8') if name == 'cp65001' else None)
+
+ workaround_optparse_bug9161()
+
+ setproctitle('youtube-dlc')
+
+ parser, opts, args = parseOpts(argv)
+
+ # Set user agent
+ if opts.user_agent is not None:
+ std_headers['User-Agent'] = opts.user_agent
+
+ # Set referer
+ if opts.referer is not None:
+ std_headers['Referer'] = opts.referer
+
+ # Custom HTTP headers
+ if opts.headers is not None:
+ for h in opts.headers:
+ if ':' not in h:
+ parser.error('wrong header formatting, it should be key:value, not "%s"' % h)
+ key, value = h.split(':', 1)
+ if opts.verbose:
+ write_string('[debug] Adding header from command line option %s:%s\n' % (key, value))
+ std_headers[key] = value
+
+ # Dump user agent
+ if opts.dump_user_agent:
+ write_string(std_headers['User-Agent'] + '\n', out=sys.stdout)
+ sys.exit(0)
+
+ # Batch file verification
+ batch_urls = []
+ if opts.batchfile is not None:
+ try:
+ if opts.batchfile == '-':
+ batchfd = sys.stdin
+ else:
+ batchfd = io.open(
+ expand_path(opts.batchfile),
+ 'r', encoding='utf-8', errors='ignore')
+ batch_urls = read_batch_urls(batchfd)
+ if opts.verbose:
+ write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n')
+ except IOError:
+ sys.exit('ERROR: batch file %s could not be read' % opts.batchfile)
+ all_urls = batch_urls + [url.strip() for url in args] # batch_urls are already striped in read_batch_urls
+ _enc = preferredencoding()
+ all_urls = [url.decode(_enc, 'ignore') if isinstance(url, bytes) else url for url in all_urls]
+
+ if opts.list_extractors:
+ for ie in list_extractors(opts.age_limit):
+ write_string(ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie._WORKING else '') + '\n', out=sys.stdout)
+ matchedUrls = [url for url in all_urls if ie.suitable(url)]
+ for mu in matchedUrls:
+ write_string(' ' + mu + '\n', out=sys.stdout)
+ sys.exit(0)
+ if opts.list_extractor_descriptions:
+ for ie in list_extractors(opts.age_limit):
+ if not ie._WORKING:
+ continue
+ desc = getattr(ie, 'IE_DESC', ie.IE_NAME)
+ if desc is False:
+ continue
+ if hasattr(ie, 'SEARCH_KEY'):
+ _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow')
+ _COUNTS = ('', '5', '10', 'all')
+ desc += ' (Example: "%s%s:%s" )' % (ie.SEARCH_KEY, random.choice(_COUNTS), random.choice(_SEARCHES))
+ write_string(desc + '\n', out=sys.stdout)
+ sys.exit(0)
+ if opts.ap_list_mso:
+ table = [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()]
+ write_string('Supported TV Providers:\n' + render_table(['mso', 'mso name'], table) + '\n', out=sys.stdout)
+ sys.exit(0)
+
+ # Conflicting, missing and erroneous options
+ if opts.usenetrc and (opts.username is not None or opts.password is not None):
+ parser.error('using .netrc conflicts with giving username/password')
+ if opts.password is not None and opts.username is None:
+ parser.error('account username missing\n')
+ if opts.ap_password is not None and opts.ap_username is None:
+ parser.error('TV Provider account username missing\n')
+ if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid):
+ parser.error('using output template conflicts with using title, video ID or auto number')
+ if opts.autonumber_size is not None:
+ if opts.autonumber_size <= 0:
+ parser.error('auto number size must be positive')
+ if opts.autonumber_start is not None:
+ if opts.autonumber_start < 0:
+ parser.error('auto number start must be positive or 0')
+ if opts.usetitle and opts.useid:
+ parser.error('using title conflicts with using video ID')
+ if opts.username is not None and opts.password is None:
+ opts.password = compat_getpass('Type account password and press [Return]: ')
+ if opts.ap_username is not None and opts.ap_password is None:
+ opts.ap_password = compat_getpass('Type TV provider account password and press [Return]: ')
+ if opts.ratelimit is not None:
+ numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
+ if numeric_limit is None:
+ parser.error('invalid rate limit specified')
+ opts.ratelimit = numeric_limit
+ if opts.min_filesize is not None:
+ numeric_limit = FileDownloader.parse_bytes(opts.min_filesize)
+ if numeric_limit is None:
+ parser.error('invalid min_filesize specified')
+ opts.min_filesize = numeric_limit
+ if opts.max_filesize is not None:
+ numeric_limit = FileDownloader.parse_bytes(opts.max_filesize)
+ if numeric_limit is None:
+ parser.error('invalid max_filesize specified')
+ opts.max_filesize = numeric_limit
+ if opts.sleep_interval is not None:
+ if opts.sleep_interval < 0:
+ parser.error('sleep interval must be positive or 0')
+ if opts.max_sleep_interval is not None:
+ if opts.max_sleep_interval < 0:
+ parser.error('max sleep interval must be positive or 0')
+ if opts.sleep_interval is None:
+ parser.error('min sleep interval must be specified, use --min-sleep-interval')
+ if opts.max_sleep_interval < opts.sleep_interval:
+ parser.error('max sleep interval must be greater than or equal to min sleep interval')
+ else:
+ opts.max_sleep_interval = opts.sleep_interval
+ if opts.ap_mso and opts.ap_mso not in MSO_INFO:
+ parser.error('Unsupported TV Provider, use --ap-list-mso to get a list of supported TV Providers')
+
+ def parse_retries(retries):
+ if retries in ('inf', 'infinite'):
+ parsed_retries = float('inf')
+ else:
+ try:
+ parsed_retries = int(retries)
+ except (TypeError, ValueError):
+ parser.error('invalid retry count specified')
+ return parsed_retries
+ if opts.retries is not None:
+ opts.retries = parse_retries(opts.retries)
+ if opts.fragment_retries is not None:
+ opts.fragment_retries = parse_retries(opts.fragment_retries)
+ if opts.buffersize is not None:
+ numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize)
+ if numeric_buffersize is None:
+ parser.error('invalid buffer size specified')
+ opts.buffersize = numeric_buffersize
+ if opts.http_chunk_size is not None:
+ numeric_chunksize = FileDownloader.parse_bytes(opts.http_chunk_size)
+ if not numeric_chunksize:
+ parser.error('invalid http chunk size specified')
+ opts.http_chunk_size = numeric_chunksize
+ if opts.playliststart <= 0:
+ raise ValueError('Playlist start must be positive')
+ if opts.playlistend not in (-1, None) and opts.playlistend < opts.playliststart:
+ raise ValueError('Playlist end must be greater than playlist start')
+ if opts.extractaudio:
+ if opts.audioformat not in ['best', 'aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']:
+ parser.error('invalid audio format specified')
+ if opts.audioquality:
+ opts.audioquality = opts.audioquality.strip('k').strip('K')
+ if not opts.audioquality.isdigit():
+ parser.error('invalid audio quality specified')
+ if opts.recodevideo is not None:
+ if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg', 'mkv', 'avi']:
+ parser.error('invalid video recode format specified')
+ if opts.convertsubtitles is not None:
+ if opts.convertsubtitles not in ['srt', 'vtt', 'ass', 'lrc']:
+ parser.error('invalid subtitle format specified')
+
+ if opts.date is not None:
+ date = DateRange.day(opts.date)
+ else:
+ date = DateRange(opts.dateafter, opts.datebefore)
+
+ # Do not download videos when there are audio-only formats
+ if opts.extractaudio and not opts.keepvideo and opts.format is None:
+ opts.format = 'bestaudio/best'
+
+ # --all-sub automatically sets --write-sub if --write-auto-sub is not given
+ # this was the old behaviour if only --all-sub was given.
+ if opts.allsubtitles and not opts.writeautomaticsub:
+ opts.writesubtitles = True
+
+ outtmpl = ((opts.outtmpl is not None and opts.outtmpl)
+ or (opts.format == '-1' and opts.usetitle and '%(title)s-%(id)s-%(format)s.%(ext)s')
+ or (opts.format == '-1' and '%(id)s-%(format)s.%(ext)s')
+ or (opts.usetitle and opts.autonumber and '%(autonumber)s-%(title)s-%(id)s.%(ext)s')
+ or (opts.usetitle and '%(title)s-%(id)s.%(ext)s')
+ or (opts.useid and '%(id)s.%(ext)s')
+ or (opts.autonumber and '%(autonumber)s-%(id)s.%(ext)s')
+ or DEFAULT_OUTTMPL)
+ if not os.path.splitext(outtmpl)[1] and opts.extractaudio:
+ parser.error('Cannot download a video and extract audio into the same'
+ ' file! Use "{0}.%(ext)s" instead of "{0}" as the output'
+ ' template'.format(outtmpl))
+
+ any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
+ any_printing = opts.print_json
+ download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive
+
+ # PostProcessors
+ postprocessors = []
+ if opts.metafromtitle:
+ postprocessors.append({
+ 'key': 'MetadataFromTitle',
+ 'titleformat': opts.metafromtitle
+ })
+ if opts.extractaudio:
+ postprocessors.append({
+ 'key': 'FFmpegExtractAudio',
+ 'preferredcodec': opts.audioformat,
+ 'preferredquality': opts.audioquality,
+ 'nopostoverwrites': opts.nopostoverwrites,
+ })
+ if opts.recodevideo:
+ postprocessors.append({
+ 'key': 'FFmpegVideoConvertor',
+ 'preferedformat': opts.recodevideo,
+ })
+ # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and
+ # FFmpegExtractAudioPP as containers before conversion may not support
+ # metadata (3gp, webm, etc.)
+ # And this post-processor should be placed before other metadata
+ # manipulating post-processors (FFmpegEmbedSubtitle) to prevent loss of
+ # extra metadata. By default ffmpeg preserves metadata applicable for both
+ # source and target containers. From this point the container won't change,
+ # so metadata can be added here.
+ if opts.addmetadata:
+ postprocessors.append({'key': 'FFmpegMetadata'})
+ if opts.convertsubtitles:
+ postprocessors.append({
+ 'key': 'FFmpegSubtitlesConvertor',
+ 'format': opts.convertsubtitles,
+ })
+ if opts.embedsubtitles:
+ postprocessors.append({
+ 'key': 'FFmpegEmbedSubtitle',
+ })
+ if opts.embedthumbnail:
+ already_have_thumbnail = opts.writethumbnail or opts.write_all_thumbnails
+ postprocessors.append({
+ 'key': 'EmbedThumbnail',
+ 'already_have_thumbnail': already_have_thumbnail
+ })
+ if not already_have_thumbnail:
+ opts.writethumbnail = True
+ # XAttrMetadataPP should be run after post-processors that may change file
+ # contents
+ if opts.xattrs:
+ postprocessors.append({'key': 'XAttrMetadata'})
+ # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
+ # So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
+ if opts.exec_cmd:
+ postprocessors.append({
+ 'key': 'ExecAfterDownload',
+ 'exec_cmd': opts.exec_cmd,
+ })
+ external_downloader_args = None
+ if opts.external_downloader_args:
+ external_downloader_args = compat_shlex_split(opts.external_downloader_args)
+ postprocessor_args = None
+ if opts.postprocessor_args:
+ postprocessor_args = compat_shlex_split(opts.postprocessor_args)
+ match_filter = (
+ None if opts.match_filter is None
+ else match_filter_func(opts.match_filter))
+
+ ydl_opts = {
+ 'usenetrc': opts.usenetrc,
+ 'username': opts.username,
+ 'password': opts.password,
+ 'twofactor': opts.twofactor,
+ 'videopassword': opts.videopassword,
+ 'ap_mso': opts.ap_mso,
+ 'ap_username': opts.ap_username,
+ 'ap_password': opts.ap_password,
+ 'quiet': (opts.quiet or any_getting or any_printing),
+ 'no_warnings': opts.no_warnings,
+ 'forceurl': opts.geturl,
+ 'forcetitle': opts.gettitle,
+ 'forceid': opts.getid,
+ 'forcethumbnail': opts.getthumbnail,
+ 'forcedescription': opts.getdescription,
+ 'forceduration': opts.getduration,
+ 'forcefilename': opts.getfilename,
+ 'forceformat': opts.getformat,
+ 'forcejson': opts.dumpjson or opts.print_json,
+ 'dump_single_json': opts.dump_single_json,
+ 'simulate': opts.simulate or any_getting,
+ 'skip_download': opts.skip_download,
+ 'format': opts.format,
+ 'listformats': opts.listformats,
+ 'outtmpl': outtmpl,
+ 'autonumber_size': opts.autonumber_size,
+ 'autonumber_start': opts.autonumber_start,
+ 'restrictfilenames': opts.restrictfilenames,
+ 'ignoreerrors': opts.ignoreerrors,
+ 'force_generic_extractor': opts.force_generic_extractor,
+ 'ratelimit': opts.ratelimit,
+ 'nooverwrites': opts.nooverwrites,
+ 'retries': opts.retries,
+ 'fragment_retries': opts.fragment_retries,
+ 'skip_unavailable_fragments': opts.skip_unavailable_fragments,
+ 'keep_fragments': opts.keep_fragments,
+ 'buffersize': opts.buffersize,
+ 'noresizebuffer': opts.noresizebuffer,
+ 'http_chunk_size': opts.http_chunk_size,
+ 'continuedl': opts.continue_dl,
+ 'noprogress': opts.noprogress,
+ 'progress_with_newline': opts.progress_with_newline,
+ 'playliststart': opts.playliststart,
+ 'playlistend': opts.playlistend,
+ 'playlistreverse': opts.playlist_reverse,
+ 'playlistrandom': opts.playlist_random,
+ 'noplaylist': opts.noplaylist,
+ 'logtostderr': opts.outtmpl == '-',
+ 'consoletitle': opts.consoletitle,
+ 'nopart': opts.nopart,
+ 'updatetime': opts.updatetime,
+ 'writedescription': opts.writedescription,
+ 'writeannotations': opts.writeannotations,
+ 'writeinfojson': opts.writeinfojson,
+ 'writethumbnail': opts.writethumbnail,
+ 'write_all_thumbnails': opts.write_all_thumbnails,
+ 'writesubtitles': opts.writesubtitles,
+ 'writeautomaticsub': opts.writeautomaticsub,
+ 'allsubtitles': opts.allsubtitles,
+ 'listsubtitles': opts.listsubtitles,
+ 'subtitlesformat': opts.subtitlesformat,
+ 'subtitleslangs': opts.subtitleslangs,
+ 'matchtitle': decodeOption(opts.matchtitle),
+ 'rejecttitle': decodeOption(opts.rejecttitle),
+ 'max_downloads': opts.max_downloads,
+ 'prefer_free_formats': opts.prefer_free_formats,
+ 'verbose': opts.verbose,
+ 'dump_intermediate_pages': opts.dump_intermediate_pages,
+ 'write_pages': opts.write_pages,
+ 'test': opts.test,
+ 'keepvideo': opts.keepvideo,
+ 'min_filesize': opts.min_filesize,
+ 'max_filesize': opts.max_filesize,
+ 'min_views': opts.min_views,
+ 'max_views': opts.max_views,
+ 'daterange': date,
+ 'cachedir': opts.cachedir,
+ 'youtube_print_sig_code': opts.youtube_print_sig_code,
+ 'age_limit': opts.age_limit,
+ 'download_archive': download_archive_fn,
+ 'cookiefile': opts.cookiefile,
+ 'nocheckcertificate': opts.no_check_certificate,
+ 'prefer_insecure': opts.prefer_insecure,
+ 'proxy': opts.proxy,
+ 'socket_timeout': opts.socket_timeout,
+ 'bidi_workaround': opts.bidi_workaround,
+ 'debug_printtraffic': opts.debug_printtraffic,
+ 'prefer_ffmpeg': opts.prefer_ffmpeg,
+ 'include_ads': opts.include_ads,
+ 'default_search': opts.default_search,
+ 'youtube_include_dash_manifest': opts.youtube_include_dash_manifest,
+ 'encoding': opts.encoding,
+ 'extract_flat': opts.extract_flat,
+ 'mark_watched': opts.mark_watched,
+ 'merge_output_format': opts.merge_output_format,
+ 'postprocessors': postprocessors,
+ 'fixup': opts.fixup,
+ 'source_address': opts.source_address,
+ 'call_home': opts.call_home,
+ 'sleep_interval': opts.sleep_interval,
+ 'max_sleep_interval': opts.max_sleep_interval,
+ 'external_downloader': opts.external_downloader,
+ 'list_thumbnails': opts.list_thumbnails,
+ 'playlist_items': opts.playlist_items,
+ 'xattr_set_filesize': opts.xattr_set_filesize,
+ 'match_filter': match_filter,
+ 'no_color': opts.no_color,
+ 'ffmpeg_location': opts.ffmpeg_location,
+ 'hls_prefer_native': opts.hls_prefer_native,
+ 'hls_use_mpegts': opts.hls_use_mpegts,
+ 'external_downloader_args': external_downloader_args,
+ 'postprocessor_args': postprocessor_args,
+ 'cn_verification_proxy': opts.cn_verification_proxy,
+ 'geo_verification_proxy': opts.geo_verification_proxy,
+ 'config_location': opts.config_location,
+ 'geo_bypass': opts.geo_bypass,
+ 'geo_bypass_country': opts.geo_bypass_country,
+ 'geo_bypass_ip_block': opts.geo_bypass_ip_block,
+ # just for deprecation check
+ 'autonumber': opts.autonumber if opts.autonumber is True else None,
+ 'usetitle': opts.usetitle if opts.usetitle is True else None,
+ }
+
+ with YoutubeDL(ydl_opts) as ydl:
+ # Update version
+ if opts.update_self:
+ update_self(ydl.to_screen, opts.verbose, ydl._opener)
+
+ # Remove cache dir
+ if opts.rm_cachedir:
+ ydl.cache.remove()
+
+ # Maybe do nothing
+ if (len(all_urls) < 1) and (opts.load_info_filename is None):
+ if opts.update_self or opts.rm_cachedir:
+ sys.exit()
+
+ ydl.warn_if_short_id(sys.argv[1:] if argv is None else argv)
+ parser.error(
+ 'You must provide at least one URL.\n'
+ 'Type youtube-dlc --help to see a list of all options.')
+
+ try:
+ if opts.load_info_filename is not None:
+ retcode = ydl.download_with_info_file(expand_path(opts.load_info_filename))
+ else:
+ retcode = ydl.download(all_urls)
+ except MaxDownloadsReached:
+ ydl.to_screen('--max-download limit reached, aborting.')
+ retcode = 101
+
+ sys.exit(retcode)
+
+
+def main(argv=None):
+ try:
+ _real_main(argv)
+ except DownloadError:
+ sys.exit(1)
+ except SameFileError:
+ sys.exit('ERROR: fixed output name but more than one file to download')
+ except KeyboardInterrupt:
+ sys.exit('\nERROR: Interrupted by user')
+
+
+__all__ = ['main', 'YoutubeDL', 'gen_extractors', 'list_extractors']
diff --git a/youtube_dlc/__main__.py b/youtube_dlc/__main__.py
new file mode 100644
index 000000000..0e7601686
--- /dev/null
+++ b/youtube_dlc/__main__.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+from __future__ import unicode_literals
+
+# Execute with
+# $ python youtube_dlc/__main__.py (2.6+)
+# $ python -m youtube_dlc (2.7+)
+
+import sys
+
+if __package__ is None and not hasattr(sys, 'frozen'):
+ # direct call of __main__.py
+ import os.path
+ path = os.path.realpath(os.path.abspath(__file__))
+ sys.path.insert(0, os.path.dirname(os.path.dirname(path)))
+
+import youtube_dlc
+
+if __name__ == '__main__':
+ youtube_dlc.main()
diff --git a/youtube_dl/aes.py b/youtube_dlc/aes.py
index 461bb6d41..461bb6d41 100644
--- a/youtube_dl/aes.py
+++ b/youtube_dlc/aes.py
diff --git a/youtube_dlc/cache.py b/youtube_dlc/cache.py
new file mode 100644
index 000000000..ada6aa1f2
--- /dev/null
+++ b/youtube_dlc/cache.py
@@ -0,0 +1,96 @@
+from __future__ import unicode_literals
+
+import errno
+import io
+import json
+import os
+import re
+import shutil
+import traceback
+
+from .compat import compat_getenv
+from .utils import (
+ expand_path,
+ write_json_file,
+)
+
+
+class Cache(object):
+ def __init__(self, ydl):
+ self._ydl = ydl
+
+ def _get_root_dir(self):
+ res = self._ydl.params.get('cachedir')
+ if res is None:
+ cache_root = compat_getenv('XDG_CACHE_HOME', '~/.cache')
+ res = os.path.join(cache_root, 'youtube-dlc')
+ return expand_path(res)
+
+ def _get_cache_fn(self, section, key, dtype):
+ assert re.match(r'^[a-zA-Z0-9_.-]+$', section), \
+ 'invalid section %r' % section
+ assert re.match(r'^[a-zA-Z0-9_.-]+$', key), 'invalid key %r' % key
+ return os.path.join(
+ self._get_root_dir(), section, '%s.%s' % (key, dtype))
+
+ @property
+ def enabled(self):
+ return self._ydl.params.get('cachedir') is not False
+
+ def store(self, section, key, data, dtype='json'):
+ assert dtype in ('json',)
+
+ if not self.enabled:
+ return
+
+ fn = self._get_cache_fn(section, key, dtype)
+ try:
+ try:
+ os.makedirs(os.path.dirname(fn))
+ except OSError as ose:
+ if ose.errno != errno.EEXIST:
+ raise
+ write_json_file(data, fn)
+ except Exception:
+ tb = traceback.format_exc()
+ self._ydl.report_warning(
+ 'Writing cache to %r failed: %s' % (fn, tb))
+
+ def load(self, section, key, dtype='json', default=None):
+ assert dtype in ('json',)
+
+ if not self.enabled:
+ return default
+
+ cache_fn = self._get_cache_fn(section, key, dtype)
+ try:
+ try:
+ with io.open(cache_fn, 'r', encoding='utf-8') as cachef:
+ return json.load(cachef)
+ except ValueError:
+ try:
+ file_size = os.path.getsize(cache_fn)
+ except (OSError, IOError) as oe:
+ file_size = str(oe)
+ self._ydl.report_warning(
+ 'Cache retrieval from %s failed (%s)' % (cache_fn, file_size))
+ except IOError:
+ pass # No cache available
+
+ return default
+
+ def remove(self):
+ if not self.enabled:
+ self._ydl.to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)')
+ return
+
+ cachedir = self._get_root_dir()
+ if not any((term in cachedir) for term in ('cache', 'tmp')):
+ raise Exception('Not removing directory %s - this does not look like a cache dir' % cachedir)
+
+ self._ydl.to_screen(
+ 'Removing cache dir %s .' % cachedir, skip_eol=True)
+ if os.path.exists(cachedir):
+ self._ydl.to_screen('.', skip_eol=True)
+ shutil.rmtree(cachedir)
+ self._ydl.to_screen('.')
diff --git a/youtube_dlc/compat.py b/youtube_dlc/compat.py
new file mode 100644
index 000000000..1cf7efed6
--- /dev/null
+++ b/youtube_dlc/compat.py
@@ -0,0 +1,3050 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+import binascii
+import collections
+import ctypes
+import email
+import getpass
+import io
+import itertools
+import optparse
+import os
+import platform
+import re
+import shlex
+import shutil
+import socket
+import struct
+import subprocess
+import sys
+import xml.etree.ElementTree
+
+
+try:
+ import urllib.request as compat_urllib_request
+except ImportError: # Python 2
+ import urllib2 as compat_urllib_request
+
+try:
+ import urllib.error as compat_urllib_error
+except ImportError: # Python 2
+ import urllib2 as compat_urllib_error
+
+try:
+ import urllib.parse as compat_urllib_parse
+except ImportError: # Python 2
+ import urllib as compat_urllib_parse
+
+try:
+ from urllib.parse import urlparse as compat_urllib_parse_urlparse
+except ImportError: # Python 2
+ from urlparse import urlparse as compat_urllib_parse_urlparse
+
+try:
+ import urllib.parse as compat_urlparse
+except ImportError: # Python 2
+ import urlparse as compat_urlparse
+
+try:
+ import urllib.response as compat_urllib_response
+except ImportError: # Python 2
+ import urllib as compat_urllib_response
+
+try:
+ import http.cookiejar as compat_cookiejar
+except ImportError: # Python 2
+ import cookielib as compat_cookiejar
+
+if sys.version_info[0] == 2:
+ class compat_cookiejar_Cookie(compat_cookiejar.Cookie):
+ def __init__(self, version, name, value, *args, **kwargs):
+ if isinstance(name, compat_str):
+ name = name.encode()
+ if isinstance(value, compat_str):
+ value = value.encode()
+ compat_cookiejar.Cookie.__init__(self, version, name, value, *args, **kwargs)
+else:
+ compat_cookiejar_Cookie = compat_cookiejar.Cookie
+
+try:
+ import http.cookies as compat_cookies
+except ImportError: # Python 2
+ import Cookie as compat_cookies
+
+try:
+ import html.entities as compat_html_entities
+except ImportError: # Python 2
+ import htmlentitydefs as compat_html_entities
+
+try: # Python >= 3.3
+ compat_html_entities_html5 = compat_html_entities.html5
+except AttributeError:
+ # Copied from CPython 3.5.1 html/entities.py
+ compat_html_entities_html5 = {
+ 'Aacute': '\xc1',
+ 'aacute': '\xe1',
+ 'Aacute;': '\xc1',
+ 'aacute;': '\xe1',
+ 'Abreve;': '\u0102',
+ 'abreve;': '\u0103',
+ 'ac;': '\u223e',
+ 'acd;': '\u223f',
+ 'acE;': '\u223e\u0333',
+ 'Acirc': '\xc2',
+ 'acirc': '\xe2',
+ 'Acirc;': '\xc2',
+ 'acirc;': '\xe2',
+ 'acute': '\xb4',
+ 'acute;': '\xb4',
+ 'Acy;': '\u0410',
+ 'acy;': '\u0430',
+ 'AElig': '\xc6',
+ 'aelig': '\xe6',
+ 'AElig;': '\xc6',
+ 'aelig;': '\xe6',
+ 'af;': '\u2061',
+ 'Afr;': '\U0001d504',
+ 'afr;': '\U0001d51e',
+ 'Agrave': '\xc0',
+ 'agrave': '\xe0',
+ 'Agrave;': '\xc0',
+ 'agrave;': '\xe0',
+ 'alefsym;': '\u2135',
+ 'aleph;': '\u2135',
+ 'Alpha;': '\u0391',
+ 'alpha;': '\u03b1',
+ 'Amacr;': '\u0100',
+ 'amacr;': '\u0101',
+ 'amalg;': '\u2a3f',
+ 'AMP': '&',
+ 'amp': '&',
+ 'AMP;': '&',
+ 'amp;': '&',
+ 'And;': '\u2a53',
+ 'and;': '\u2227',
+ 'andand;': '\u2a55',
+ 'andd;': '\u2a5c',
+ 'andslope;': '\u2a58',
+ 'andv;': '\u2a5a',
+ 'ang;': '\u2220',
+ 'ange;': '\u29a4',
+ 'angle;': '\u2220',
+ 'angmsd;': '\u2221',
+ 'angmsdaa;': '\u29a8',
+ 'angmsdab;': '\u29a9',
+ 'angmsdac;': '\u29aa',
+ 'angmsdad;': '\u29ab',
+ 'angmsdae;': '\u29ac',
+ 'angmsdaf;': '\u29ad',
+ 'angmsdag;': '\u29ae',
+ 'angmsdah;': '\u29af',
+ 'angrt;': '\u221f',
+ 'angrtvb;': '\u22be',
+ 'angrtvbd;': '\u299d',
+ 'angsph;': '\u2222',
+ 'angst;': '\xc5',
+ 'angzarr;': '\u237c',
+ 'Aogon;': '\u0104',
+ 'aogon;': '\u0105',
+ 'Aopf;': '\U0001d538',
+ 'aopf;': '\U0001d552',
+ 'ap;': '\u2248',
+ 'apacir;': '\u2a6f',
+ 'apE;': '\u2a70',
+ 'ape;': '\u224a',
+ 'apid;': '\u224b',
+ 'apos;': "'",
+ 'ApplyFunction;': '\u2061',
+ 'approx;': '\u2248',
+ 'approxeq;': '\u224a',
+ 'Aring': '\xc5',
+ 'aring': '\xe5',
+ 'Aring;': '\xc5',
+ 'aring;': '\xe5',
+ 'Ascr;': '\U0001d49c',
+ 'ascr;': '\U0001d4b6',
+ 'Assign;': '\u2254',
+ 'ast;': '*',
+ 'asymp;': '\u2248',
+ 'asympeq;': '\u224d',
+ 'Atilde': '\xc3',
+ 'atilde': '\xe3',
+ 'Atilde;': '\xc3',
+ 'atilde;': '\xe3',
+ 'Auml': '\xc4',
+ 'auml': '\xe4',
+ 'Auml;': '\xc4',
+ 'auml;': '\xe4',
+ 'awconint;': '\u2233',
+ 'awint;': '\u2a11',
+ 'backcong;': '\u224c',
+ 'backepsilon;': '\u03f6',
+ 'backprime;': '\u2035',
+ 'backsim;': '\u223d',
+ 'backsimeq;': '\u22cd',
+ 'Backslash;': '\u2216',
+ 'Barv;': '\u2ae7',
+ 'barvee;': '\u22bd',
+ 'Barwed;': '\u2306',
+ 'barwed;': '\u2305',
+ 'barwedge;': '\u2305',
+ 'bbrk;': '\u23b5',
+ 'bbrktbrk;': '\u23b6',
+ 'bcong;': '\u224c',
+ 'Bcy;': '\u0411',
+ 'bcy;': '\u0431',
+ 'bdquo;': '\u201e',
+ 'becaus;': '\u2235',
+ 'Because;': '\u2235',
+ 'because;': '\u2235',
+ 'bemptyv;': '\u29b0',
+ 'bepsi;': '\u03f6',
+ 'bernou;': '\u212c',
+ 'Bernoullis;': '\u212c',
+ 'Beta;': '\u0392',
+ 'beta;': '\u03b2',
+ 'beth;': '\u2136',
+ 'between;': '\u226c',
+ 'Bfr;': '\U0001d505',
+ 'bfr;': '\U0001d51f',
+ 'bigcap;': '\u22c2',
+ 'bigcirc;': '\u25ef',
+ 'bigcup;': '\u22c3',
+ 'bigodot;': '\u2a00',
+ 'bigoplus;': '\u2a01',
+ 'bigotimes;': '\u2a02',
+ 'bigsqcup;': '\u2a06',
+ 'bigstar;': '\u2605',
+ 'bigtriangledown;': '\u25bd',
+ 'bigtriangleup;': '\u25b3',
+ 'biguplus;': '\u2a04',
+ 'bigvee;': '\u22c1',
+ 'bigwedge;': '\u22c0',
+ 'bkarow;': '\u290d',
+ 'blacklozenge;': '\u29eb',
+ 'blacksquare;': '\u25aa',
+ 'blacktriangle;': '\u25b4',
+ 'blacktriangledown;': '\u25be',
+ 'blacktriangleleft;': '\u25c2',
+ 'blacktriangleright;': '\u25b8',
+ 'blank;': '\u2423',
+ 'blk12;': '\u2592',
+ 'blk14;': '\u2591',
+ 'blk34;': '\u2593',
+ 'block;': '\u2588',
+ 'bne;': '=\u20e5',
+ 'bnequiv;': '\u2261\u20e5',
+ 'bNot;': '\u2aed',
+ 'bnot;': '\u2310',
+ 'Bopf;': '\U0001d539',
+ 'bopf;': '\U0001d553',
+ 'bot;': '\u22a5',
+ 'bottom;': '\u22a5',
+ 'bowtie;': '\u22c8',
+ 'boxbox;': '\u29c9',
+ 'boxDL;': '\u2557',
+ 'boxDl;': '\u2556',
+ 'boxdL;': '\u2555',
+ 'boxdl;': '\u2510',
+ 'boxDR;': '\u2554',
+ 'boxDr;': '\u2553',
+ 'boxdR;': '\u2552',
+ 'boxdr;': '\u250c',
+ 'boxH;': '\u2550',
+ 'boxh;': '\u2500',
+ 'boxHD;': '\u2566',
+ 'boxHd;': '\u2564',
+ 'boxhD;': '\u2565',
+ 'boxhd;': '\u252c',
+ 'boxHU;': '\u2569',
+ 'boxHu;': '\u2567',
+ 'boxhU;': '\u2568',
+ 'boxhu;': '\u2534',
+ 'boxminus;': '\u229f',
+ 'boxplus;': '\u229e',
+ 'boxtimes;': '\u22a0',
+ 'boxUL;': '\u255d',
+ 'boxUl;': '\u255c',
+ 'boxuL;': '\u255b',
+ 'boxul;': '\u2518',
+ 'boxUR;': '\u255a',
+ 'boxUr;': '\u2559',
+ 'boxuR;': '\u2558',
+ 'boxur;': '\u2514',
+ 'boxV;': '\u2551',
+ 'boxv;': '\u2502',
+ 'boxVH;': '\u256c',
+ 'boxVh;': '\u256b',
+ 'boxvH;': '\u256a',
+ 'boxvh;': '\u253c',
+ 'boxVL;': '\u2563',
+ 'boxVl;': '\u2562',
+ 'boxvL;': '\u2561',
+ 'boxvl;': '\u2524',
+ 'boxVR;': '\u2560',
+ 'boxVr;': '\u255f',
+ 'boxvR;': '\u255e',
+ 'boxvr;': '\u251c',
+ 'bprime;': '\u2035',
+ 'Breve;': '\u02d8',
+ 'breve;': '\u02d8',
+ 'brvbar': '\xa6',
+ 'brvbar;': '\xa6',
+ 'Bscr;': '\u212c',
+ 'bscr;': '\U0001d4b7',
+ 'bsemi;': '\u204f',
+ 'bsim;': '\u223d',
+ 'bsime;': '\u22cd',
+ 'bsol;': '\\',
+ 'bsolb;': '\u29c5',
+ 'bsolhsub;': '\u27c8',
+ 'bull;': '\u2022',
+ 'bullet;': '\u2022',
+ 'bump;': '\u224e',
+ 'bumpE;': '\u2aae',
+ 'bumpe;': '\u224f',
+ 'Bumpeq;': '\u224e',
+ 'bumpeq;': '\u224f',
+ 'Cacute;': '\u0106',
+ 'cacute;': '\u0107',
+ 'Cap;': '\u22d2',
+ 'cap;': '\u2229',
+ 'capand;': '\u2a44',
+ 'capbrcup;': '\u2a49',
+ 'capcap;': '\u2a4b',
+ 'capcup;': '\u2a47',
+ 'capdot;': '\u2a40',
+ 'CapitalDifferentialD;': '\u2145',
+ 'caps;': '\u2229\ufe00',
+ 'caret;': '\u2041',
+ 'caron;': '\u02c7',
+ 'Cayleys;': '\u212d',
+ 'ccaps;': '\u2a4d',
+ 'Ccaron;': '\u010c',
+ 'ccaron;': '\u010d',
+ 'Ccedil': '\xc7',
+ 'ccedil': '\xe7',
+ 'Ccedil;': '\xc7',
+ 'ccedil;': '\xe7',
+ 'Ccirc;': '\u0108',
+ 'ccirc;': '\u0109',
+ 'Cconint;': '\u2230',
+ 'ccups;': '\u2a4c',
+ 'ccupssm;': '\u2a50',
+ 'Cdot;': '\u010a',
+ 'cdot;': '\u010b',
+ 'cedil': '\xb8',
+ 'cedil;': '\xb8',
+ 'Cedilla;': '\xb8',
+ 'cemptyv;': '\u29b2',
+ 'cent': '\xa2',
+ 'cent;': '\xa2',
+ 'CenterDot;': '\xb7',
+ 'centerdot;': '\xb7',
+ 'Cfr;': '\u212d',
+ 'cfr;': '\U0001d520',
+ 'CHcy;': '\u0427',
+ 'chcy;': '\u0447',
+ 'check;': '\u2713',
+ 'checkmark;': '\u2713',
+ 'Chi;': '\u03a7',
+ 'chi;': '\u03c7',
+ 'cir;': '\u25cb',
+ 'circ;': '\u02c6',
+ 'circeq;': '\u2257',
+ 'circlearrowleft;': '\u21ba',
+ 'circlearrowright;': '\u21bb',
+ 'circledast;': '\u229b',
+ 'circledcirc;': '\u229a',
+ 'circleddash;': '\u229d',
+ 'CircleDot;': '\u2299',
+ 'circledR;': '\xae',
+ 'circledS;': '\u24c8',
+ 'CircleMinus;': '\u2296',
+ 'CirclePlus;': '\u2295',
+ 'CircleTimes;': '\u2297',
+ 'cirE;': '\u29c3',
+ 'cire;': '\u2257',
+ 'cirfnint;': '\u2a10',
+ 'cirmid;': '\u2aef',
+ 'cirscir;': '\u29c2',
+ 'ClockwiseContourIntegral;': '\u2232',
+ 'CloseCurlyDoubleQuote;': '\u201d',
+ 'CloseCurlyQuote;': '\u2019',
+ 'clubs;': '\u2663',
+ 'clubsuit;': '\u2663',
+ 'Colon;': '\u2237',
+ 'colon;': ':',
+ 'Colone;': '\u2a74',
+ 'colone;': '\u2254',
+ 'coloneq;': '\u2254',
+ 'comma;': ',',
+ 'commat;': '@',
+ 'comp;': '\u2201',
+ 'compfn;': '\u2218',
+ 'complement;': '\u2201',
+ 'complexes;': '\u2102',
+ 'cong;': '\u2245',
+ 'congdot;': '\u2a6d',
+ 'Congruent;': '\u2261',
+ 'Conint;': '\u222f',
+ 'conint;': '\u222e',
+ 'ContourIntegral;': '\u222e',
+ 'Copf;': '\u2102',
+ 'copf;': '\U0001d554',
+ 'coprod;': '\u2210',
+ 'Coproduct;': '\u2210',
+ 'COPY': '\xa9',
+ 'copy': '\xa9',
+ 'COPY;': '\xa9',
+ 'copy;': '\xa9',
+ 'copysr;': '\u2117',
+ 'CounterClockwiseContourIntegral;': '\u2233',
+ 'crarr;': '\u21b5',
+ 'Cross;': '\u2a2f',
+ 'cross;': '\u2717',
+ 'Cscr;': '\U0001d49e',
+ 'cscr;': '\U0001d4b8',
+ 'csub;': '\u2acf',
+ 'csube;': '\u2ad1',
+ 'csup;': '\u2ad0',
+ 'csupe;': '\u2ad2',
+ 'ctdot;': '\u22ef',
+ 'cudarrl;': '\u2938',
+ 'cudarrr;': '\u2935',
+ 'cuepr;': '\u22de',
+ 'cuesc;': '\u22df',
+ 'cularr;': '\u21b6',
+ 'cularrp;': '\u293d',
+ 'Cup;': '\u22d3',
+ 'cup;': '\u222a',
+ 'cupbrcap;': '\u2a48',
+ 'CupCap;': '\u224d',
+ 'cupcap;': '\u2a46',
+ 'cupcup;': '\u2a4a',
+ 'cupdot;': '\u228d',
+ 'cupor;': '\u2a45',
+ 'cups;': '\u222a\ufe00',
+ 'curarr;': '\u21b7',
+ 'curarrm;': '\u293c',
+ 'curlyeqprec;': '\u22de',
+ 'curlyeqsucc;': '\u22df',
+ 'curlyvee;': '\u22ce',
+ 'curlywedge;': '\u22cf',
+ 'curren': '\xa4',
+ 'curren;': '\xa4',
+ 'curvearrowleft;': '\u21b6',
+ 'curvearrowright;': '\u21b7',
+ 'cuvee;': '\u22ce',
+ 'cuwed;': '\u22cf',
+ 'cwconint;': '\u2232',
+ 'cwint;': '\u2231',
+ 'cylcty;': '\u232d',
+ 'Dagger;': '\u2021',
+ 'dagger;': '\u2020',
+ 'daleth;': '\u2138',
+ 'Darr;': '\u21a1',
+ 'dArr;': '\u21d3',
+ 'darr;': '\u2193',
+ 'dash;': '\u2010',
+ 'Dashv;': '\u2ae4',
+ 'dashv;': '\u22a3',
+ 'dbkarow;': '\u290f',
+ 'dblac;': '\u02dd',
+ 'Dcaron;': '\u010e',
+ 'dcaron;': '\u010f',
+ 'Dcy;': '\u0414',
+ 'dcy;': '\u0434',
+ 'DD;': '\u2145',
+ 'dd;': '\u2146',
+ 'ddagger;': '\u2021',
+ 'ddarr;': '\u21ca',
+ 'DDotrahd;': '\u2911',
+ 'ddotseq;': '\u2a77',
+ 'deg': '\xb0',
+ 'deg;': '\xb0',
+ 'Del;': '\u2207',
+ 'Delta;': '\u0394',
+ 'delta;': '\u03b4',
+ 'demptyv;': '\u29b1',
+ 'dfisht;': '\u297f',
+ 'Dfr;': '\U0001d507',
+ 'dfr;': '\U0001d521',
+ 'dHar;': '\u2965',
+ 'dharl;': '\u21c3',
+ 'dharr;': '\u21c2',
+ 'DiacriticalAcute;': '\xb4',
+ 'DiacriticalDot;': '\u02d9',
+ 'DiacriticalDoubleAcute;': '\u02dd',
+ 'DiacriticalGrave;': '`',
+ 'DiacriticalTilde;': '\u02dc',
+ 'diam;': '\u22c4',
+ 'Diamond;': '\u22c4',
+ 'diamond;': '\u22c4',
+ 'diamondsuit;': '\u2666',
+ 'diams;': '\u2666',
+ 'die;': '\xa8',
+ 'DifferentialD;': '\u2146',
+ 'digamma;': '\u03dd',
+ 'disin;': '\u22f2',
+ 'div;': '\xf7',
+ 'divide': '\xf7',
+ 'divide;': '\xf7',
+ 'divideontimes;': '\u22c7',
+ 'divonx;': '\u22c7',
+ 'DJcy;': '\u0402',
+ 'djcy;': '\u0452',
+ 'dlcorn;': '\u231e',
+ 'dlcrop;': '\u230d',
+ 'dollar;': '$',
+ 'Dopf;': '\U0001d53b',
+ 'dopf;': '\U0001d555',
+ 'Dot;': '\xa8',
+ 'dot;': '\u02d9',
+ 'DotDot;': '\u20dc',
+ 'doteq;': '\u2250',
+ 'doteqdot;': '\u2251',
+ 'DotEqual;': '\u2250',
+ 'dotminus;': '\u2238',
+ 'dotplus;': '\u2214',
+ 'dotsquare;': '\u22a1',
+ 'doublebarwedge;': '\u2306',
+ 'DoubleContourIntegral;': '\u222f',
+ 'DoubleDot;': '\xa8',
+ 'DoubleDownArrow;': '\u21d3',
+ 'DoubleLeftArrow;': '\u21d0',
+ 'DoubleLeftRightArrow;': '\u21d4',
+ 'DoubleLeftTee;': '\u2ae4',
+ 'DoubleLongLeftArrow;': '\u27f8',
+ 'DoubleLongLeftRightArrow;': '\u27fa',
+ 'DoubleLongRightArrow;': '\u27f9',
+ 'DoubleRightArrow;': '\u21d2',
+ 'DoubleRightTee;': '\u22a8',
+ 'DoubleUpArrow;': '\u21d1',
+ 'DoubleUpDownArrow;': '\u21d5',
+ 'DoubleVerticalBar;': '\u2225',
+ 'DownArrow;': '\u2193',
+ 'Downarrow;': '\u21d3',
+ 'downarrow;': '\u2193',
+ 'DownArrowBar;': '\u2913',
+ 'DownArrowUpArrow;': '\u21f5',
+ 'DownBreve;': '\u0311',
+ 'downdownarrows;': '\u21ca',
+ 'downharpoonleft;': '\u21c3',
+ 'downharpoonright;': '\u21c2',
+ 'DownLeftRightVector;': '\u2950',
+ 'DownLeftTeeVector;': '\u295e',
+ 'DownLeftVector;': '\u21bd',
+ 'DownLeftVectorBar;': '\u2956',
+ 'DownRightTeeVector;': '\u295f',
+ 'DownRightVector;': '\u21c1',
+ 'DownRightVectorBar;': '\u2957',
+ 'DownTee;': '\u22a4',
+ 'DownTeeArrow;': '\u21a7',
+ 'drbkarow;': '\u2910',
+ 'drcorn;': '\u231f',
+ 'drcrop;': '\u230c',
+ 'Dscr;': '\U0001d49f',
+ 'dscr;': '\U0001d4b9',
+ 'DScy;': '\u0405',
+ 'dscy;': '\u0455',
+ 'dsol;': '\u29f6',
+ 'Dstrok;': '\u0110',
+ 'dstrok;': '\u0111',
+ 'dtdot;': '\u22f1',
+ 'dtri;': '\u25bf',
+ 'dtrif;': '\u25be',
+ 'duarr;': '\u21f5',
+ 'duhar;': '\u296f',
+ 'dwangle;': '\u29a6',
+ 'DZcy;': '\u040f',
+ 'dzcy;': '\u045f',
+ 'dzigrarr;': '\u27ff',
+ 'Eacute': '\xc9',
+ 'eacute': '\xe9',
+ 'Eacute;': '\xc9',
+ 'eacute;': '\xe9',
+ 'easter;': '\u2a6e',
+ 'Ecaron;': '\u011a',
+ 'ecaron;': '\u011b',
+ 'ecir;': '\u2256',
+ 'Ecirc': '\xca',
+ 'ecirc': '\xea',
+ 'Ecirc;': '\xca',
+ 'ecirc;': '\xea',
+ 'ecolon;': '\u2255',
+ 'Ecy;': '\u042d',
+ 'ecy;': '\u044d',
+ 'eDDot;': '\u2a77',
+ 'Edot;': '\u0116',
+ 'eDot;': '\u2251',
+ 'edot;': '\u0117',
+ 'ee;': '\u2147',
+ 'efDot;': '\u2252',
+ 'Efr;': '\U0001d508',
+ 'efr;': '\U0001d522',
+ 'eg;': '\u2a9a',
+ 'Egrave': '\xc8',
+ 'egrave': '\xe8',
+ 'Egrave;': '\xc8',
+ 'egrave;': '\xe8',
+ 'egs;': '\u2a96',
+ 'egsdot;': '\u2a98',
+ 'el;': '\u2a99',
+ 'Element;': '\u2208',
+ 'elinters;': '\u23e7',
+ 'ell;': '\u2113',
+ 'els;': '\u2a95',
+ 'elsdot;': '\u2a97',
+ 'Emacr;': '\u0112',
+ 'emacr;': '\u0113',
+ 'empty;': '\u2205',
+ 'emptyset;': '\u2205',
+ 'EmptySmallSquare;': '\u25fb',
+ 'emptyv;': '\u2205',
+ 'EmptyVerySmallSquare;': '\u25ab',
+ 'emsp13;': '\u2004',
+ 'emsp14;': '\u2005',
+ 'emsp;': '\u2003',
+ 'ENG;': '\u014a',
+ 'eng;': '\u014b',
+ 'ensp;': '\u2002',
+ 'Eogon;': '\u0118',
+ 'eogon;': '\u0119',
+ 'Eopf;': '\U0001d53c',
+ 'eopf;': '\U0001d556',
+ 'epar;': '\u22d5',
+ 'eparsl;': '\u29e3',
+ 'eplus;': '\u2a71',
+ 'epsi;': '\u03b5',
+ 'Epsilon;': '\u0395',
+ 'epsilon;': '\u03b5',
+ 'epsiv;': '\u03f5',
+ 'eqcirc;': '\u2256',
+ 'eqcolon;': '\u2255',
+ 'eqsim;': '\u2242',
+ 'eqslantgtr;': '\u2a96',
+ 'eqslantless;': '\u2a95',
+ 'Equal;': '\u2a75',
+ 'equals;': '=',
+ 'EqualTilde;': '\u2242',
+ 'equest;': '\u225f',
+ 'Equilibrium;': '\u21cc',
+ 'equiv;': '\u2261',
+ 'equivDD;': '\u2a78',
+ 'eqvparsl;': '\u29e5',
+ 'erarr;': '\u2971',
+ 'erDot;': '\u2253',
+ 'Escr;': '\u2130',
+ 'escr;': '\u212f',
+ 'esdot;': '\u2250',
+ 'Esim;': '\u2a73',
+ 'esim;': '\u2242',
+ 'Eta;': '\u0397',
+ 'eta;': '\u03b7',
+ 'ETH': '\xd0',
+ 'eth': '\xf0',
+ 'ETH;': '\xd0',
+ 'eth;': '\xf0',
+ 'Euml': '\xcb',
+ 'euml': '\xeb',
+ 'Euml;': '\xcb',
+ 'euml;': '\xeb',
+ 'euro;': '\u20ac',
+ 'excl;': '!',
+ 'exist;': '\u2203',
+ 'Exists;': '\u2203',
+ 'expectation;': '\u2130',
+ 'ExponentialE;': '\u2147',
+ 'exponentiale;': '\u2147',
+ 'fallingdotseq;': '\u2252',
+ 'Fcy;': '\u0424',
+ 'fcy;': '\u0444',
+ 'female;': '\u2640',
+ 'ffilig;': '\ufb03',
+ 'fflig;': '\ufb00',
+ 'ffllig;': '\ufb04',
+ 'Ffr;': '\U0001d509',
+ 'ffr;': '\U0001d523',
+ 'filig;': '\ufb01',
+ 'FilledSmallSquare;': '\u25fc',
+ 'FilledVerySmallSquare;': '\u25aa',
+ 'fjlig;': 'fj',
+ 'flat;': '\u266d',
+ 'fllig;': '\ufb02',
+ 'fltns;': '\u25b1',
+ 'fnof;': '\u0192',
+ 'Fopf;': '\U0001d53d',
+ 'fopf;': '\U0001d557',
+ 'ForAll;': '\u2200',
+ 'forall;': '\u2200',
+ 'fork;': '\u22d4',
+ 'forkv;': '\u2ad9',
+ 'Fouriertrf;': '\u2131',
+ 'fpartint;': '\u2a0d',
+ 'frac12': '\xbd',
+ 'frac12;': '\xbd',
+ 'frac13;': '\u2153',
+ 'frac14': '\xbc',
+ 'frac14;': '\xbc',
+ 'frac15;': '\u2155',
+ 'frac16;': '\u2159',
+ 'frac18;': '\u215b',
+ 'frac23;': '\u2154',
+ 'frac25;': '\u2156',
+ 'frac34': '\xbe',
+ 'frac34;': '\xbe',
+ 'frac35;': '\u2157',
+ 'frac38;': '\u215c',
+ 'frac45;': '\u2158',
+ 'frac56;': '\u215a',
+ 'frac58;': '\u215d',
+ 'frac78;': '\u215e',
+ 'frasl;': '\u2044',
+ 'frown;': '\u2322',
+ 'Fscr;': '\u2131',
+ 'fscr;': '\U0001d4bb',
+ 'gacute;': '\u01f5',
+ 'Gamma;': '\u0393',
+ 'gamma;': '\u03b3',
+ 'Gammad;': '\u03dc',
+ 'gammad;': '\u03dd',
+ 'gap;': '\u2a86',
+ 'Gbreve;': '\u011e',
+ 'gbreve;': '\u011f',
+ 'Gcedil;': '\u0122',
+ 'Gcirc;': '\u011c',
+ 'gcirc;': '\u011d',
+ 'Gcy;': '\u0413',
+ 'gcy;': '\u0433',
+ 'Gdot;': '\u0120',
+ 'gdot;': '\u0121',
+ 'gE;': '\u2267',
+ 'ge;': '\u2265',
+ 'gEl;': '\u2a8c',
+ 'gel;': '\u22db',
+ 'geq;': '\u2265',
+ 'geqq;': '\u2267',
+ 'geqslant;': '\u2a7e',
+ 'ges;': '\u2a7e',
+ 'gescc;': '\u2aa9',
+ 'gesdot;': '\u2a80',
+ 'gesdoto;': '\u2a82',
+ 'gesdotol;': '\u2a84',
+ 'gesl;': '\u22db\ufe00',
+ 'gesles;': '\u2a94',
+ 'Gfr;': '\U0001d50a',
+ 'gfr;': '\U0001d524',
+ 'Gg;': '\u22d9',
+ 'gg;': '\u226b',
+ 'ggg;': '\u22d9',
+ 'gimel;': '\u2137',
+ 'GJcy;': '\u0403',
+ 'gjcy;': '\u0453',
+ 'gl;': '\u2277',
+ 'gla;': '\u2aa5',
+ 'glE;': '\u2a92',
+ 'glj;': '\u2aa4',
+ 'gnap;': '\u2a8a',
+ 'gnapprox;': '\u2a8a',
+ 'gnE;': '\u2269',
+ 'gne;': '\u2a88',
+ 'gneq;': '\u2a88',
+ 'gneqq;': '\u2269',
+ 'gnsim;': '\u22e7',
+ 'Gopf;': '\U0001d53e',
+ 'gopf;': '\U0001d558',
+ 'grave;': '`',
+ 'GreaterEqual;': '\u2265',
+ 'GreaterEqualLess;': '\u22db',
+ 'GreaterFullEqual;': '\u2267',
+ 'GreaterGreater;': '\u2aa2',
+ 'GreaterLess;': '\u2277',
+ 'GreaterSlantEqual;': '\u2a7e',
+ 'GreaterTilde;': '\u2273',
+ 'Gscr;': '\U0001d4a2',
+ 'gscr;': '\u210a',
+ 'gsim;': '\u2273',
+ 'gsime;': '\u2a8e',
+ 'gsiml;': '\u2a90',
+ 'GT': '>',
+ 'gt': '>',
+ 'GT;': '>',
+ 'Gt;': '\u226b',
+ 'gt;': '>',
+ 'gtcc;': '\u2aa7',
+ 'gtcir;': '\u2a7a',
+ 'gtdot;': '\u22d7',
+ 'gtlPar;': '\u2995',
+ 'gtquest;': '\u2a7c',
+ 'gtrapprox;': '\u2a86',
+ 'gtrarr;': '\u2978',
+ 'gtrdot;': '\u22d7',
+ 'gtreqless;': '\u22db',
+ 'gtreqqless;': '\u2a8c',
+ 'gtrless;': '\u2277',
+ 'gtrsim;': '\u2273',
+ 'gvertneqq;': '\u2269\ufe00',
+ 'gvnE;': '\u2269\ufe00',
+ 'Hacek;': '\u02c7',
+ 'hairsp;': '\u200a',
+ 'half;': '\xbd',
+ 'hamilt;': '\u210b',
+ 'HARDcy;': '\u042a',
+ 'hardcy;': '\u044a',
+ 'hArr;': '\u21d4',
+ 'harr;': '\u2194',
+ 'harrcir;': '\u2948',
+ 'harrw;': '\u21ad',
+ 'Hat;': '^',
+ 'hbar;': '\u210f',
+ 'Hcirc;': '\u0124',
+ 'hcirc;': '\u0125',
+ 'hearts;': '\u2665',
+ 'heartsuit;': '\u2665',
+ 'hellip;': '\u2026',
+ 'hercon;': '\u22b9',
+ 'Hfr;': '\u210c',
+ 'hfr;': '\U0001d525',
+ 'HilbertSpace;': '\u210b',
+ 'hksearow;': '\u2925',
+ 'hkswarow;': '\u2926',
+ 'hoarr;': '\u21ff',
+ 'homtht;': '\u223b',
+ 'hookleftarrow;': '\u21a9',
+ 'hookrightarrow;': '\u21aa',
+ 'Hopf;': '\u210d',
+ 'hopf;': '\U0001d559',
+ 'horbar;': '\u2015',
+ 'HorizontalLine;': '\u2500',
+ 'Hscr;': '\u210b',
+ 'hscr;': '\U0001d4bd',
+ 'hslash;': '\u210f',
+ 'Hstrok;': '\u0126',
+ 'hstrok;': '\u0127',
+ 'HumpDownHump;': '\u224e',
+ 'HumpEqual;': '\u224f',
+ 'hybull;': '\u2043',
+ 'hyphen;': '\u2010',
+ 'Iacute': '\xcd',
+ 'iacute': '\xed',
+ 'Iacute;': '\xcd',
+ 'iacute;': '\xed',
+ 'ic;': '\u2063',
+ 'Icirc': '\xce',
+ 'icirc': '\xee',
+ 'Icirc;': '\xce',
+ 'icirc;': '\xee',
+ 'Icy;': '\u0418',
+ 'icy;': '\u0438',
+ 'Idot;': '\u0130',
+ 'IEcy;': '\u0415',
+ 'iecy;': '\u0435',
+ 'iexcl': '\xa1',
+ 'iexcl;': '\xa1',
+ 'iff;': '\u21d4',
+ 'Ifr;': '\u2111',
+ 'ifr;': '\U0001d526',
+ 'Igrave': '\xcc',
+ 'igrave': '\xec',
+ 'Igrave;': '\xcc',
+ 'igrave;': '\xec',
+ 'ii;': '\u2148',
+ 'iiiint;': '\u2a0c',
+ 'iiint;': '\u222d',
+ 'iinfin;': '\u29dc',
+ 'iiota;': '\u2129',
+ 'IJlig;': '\u0132',
+ 'ijlig;': '\u0133',
+ 'Im;': '\u2111',
+ 'Imacr;': '\u012a',
+ 'imacr;': '\u012b',
+ 'image;': '\u2111',
+ 'ImaginaryI;': '\u2148',
+ 'imagline;': '\u2110',
+ 'imagpart;': '\u2111',
+ 'imath;': '\u0131',
+ 'imof;': '\u22b7',
+ 'imped;': '\u01b5',
+ 'Implies;': '\u21d2',
+ 'in;': '\u2208',
+ 'incare;': '\u2105',
+ 'infin;': '\u221e',
+ 'infintie;': '\u29dd',
+ 'inodot;': '\u0131',
+ 'Int;': '\u222c',
+ 'int;': '\u222b',
+ 'intcal;': '\u22ba',
+ 'integers;': '\u2124',
+ 'Integral;': '\u222b',
+ 'intercal;': '\u22ba',
+ 'Intersection;': '\u22c2',
+ 'intlarhk;': '\u2a17',
+ 'intprod;': '\u2a3c',
+ 'InvisibleComma;': '\u2063',
+ 'InvisibleTimes;': '\u2062',
+ 'IOcy;': '\u0401',
+ 'iocy;': '\u0451',
+ 'Iogon;': '\u012e',
+ 'iogon;': '\u012f',
+ 'Iopf;': '\U0001d540',
+ 'iopf;': '\U0001d55a',
+ 'Iota;': '\u0399',
+ 'iota;': '\u03b9',
+ 'iprod;': '\u2a3c',
+ 'iquest': '\xbf',
+ 'iquest;': '\xbf',
+ 'Iscr;': '\u2110',
+ 'iscr;': '\U0001d4be',
+ 'isin;': '\u2208',
+ 'isindot;': '\u22f5',
+ 'isinE;': '\u22f9',
+ 'isins;': '\u22f4',
+ 'isinsv;': '\u22f3',
+ 'isinv;': '\u2208',
+ 'it;': '\u2062',
+ 'Itilde;': '\u0128',
+ 'itilde;': '\u0129',
+ 'Iukcy;': '\u0406',
+ 'iukcy;': '\u0456',
+ 'Iuml': '\xcf',
+ 'iuml': '\xef',
+ 'Iuml;': '\xcf',
+ 'iuml;': '\xef',
+ 'Jcirc;': '\u0134',
+ 'jcirc;': '\u0135',
+ 'Jcy;': '\u0419',
+ 'jcy;': '\u0439',
+ 'Jfr;': '\U0001d50d',
+ 'jfr;': '\U0001d527',
+ 'jmath;': '\u0237',
+ 'Jopf;': '\U0001d541',
+ 'jopf;': '\U0001d55b',
+ 'Jscr;': '\U0001d4a5',
+ 'jscr;': '\U0001d4bf',
+ 'Jsercy;': '\u0408',
+ 'jsercy;': '\u0458',
+ 'Jukcy;': '\u0404',
+ 'jukcy;': '\u0454',
+ 'Kappa;': '\u039a',
+ 'kappa;': '\u03ba',
+ 'kappav;': '\u03f0',
+ 'Kcedil;': '\u0136',
+ 'kcedil;': '\u0137',
+ 'Kcy;': '\u041a',
+ 'kcy;': '\u043a',
+ 'Kfr;': '\U0001d50e',
+ 'kfr;': '\U0001d528',
+ 'kgreen;': '\u0138',
+ 'KHcy;': '\u0425',
+ 'khcy;': '\u0445',
+ 'KJcy;': '\u040c',
+ 'kjcy;': '\u045c',
+ 'Kopf;': '\U0001d542',
+ 'kopf;': '\U0001d55c',
+ 'Kscr;': '\U0001d4a6',
+ 'kscr;': '\U0001d4c0',
+ 'lAarr;': '\u21da',
+ 'Lacute;': '\u0139',
+ 'lacute;': '\u013a',
+ 'laemptyv;': '\u29b4',
+ 'lagran;': '\u2112',
+ 'Lambda;': '\u039b',
+ 'lambda;': '\u03bb',
+ 'Lang;': '\u27ea',
+ 'lang;': '\u27e8',
+ 'langd;': '\u2991',
+ 'langle;': '\u27e8',
+ 'lap;': '\u2a85',
+ 'Laplacetrf;': '\u2112',
+ 'laquo': '\xab',
+ 'laquo;': '\xab',
+ 'Larr;': '\u219e',
+ 'lArr;': '\u21d0',
+ 'larr;': '\u2190',
+ 'larrb;': '\u21e4',
+ 'larrbfs;': '\u291f',
+ 'larrfs;': '\u291d',
+ 'larrhk;': '\u21a9',
+ 'larrlp;': '\u21ab',
+ 'larrpl;': '\u2939',
+ 'larrsim;': '\u2973',
+ 'larrtl;': '\u21a2',
+ 'lat;': '\u2aab',
+ 'lAtail;': '\u291b',
+ 'latail;': '\u2919',
+ 'late;': '\u2aad',
+ 'lates;': '\u2aad\ufe00',
+ 'lBarr;': '\u290e',
+ 'lbarr;': '\u290c',
+ 'lbbrk;': '\u2772',
+ 'lbrace;': '{',
+ 'lbrack;': '[',
+ 'lbrke;': '\u298b',
+ 'lbrksld;': '\u298f',
+ 'lbrkslu;': '\u298d',
+ 'Lcaron;': '\u013d',
+ 'lcaron;': '\u013e',
+ 'Lcedil;': '\u013b',
+ 'lcedil;': '\u013c',
+ 'lceil;': '\u2308',
+ 'lcub;': '{',
+ 'Lcy;': '\u041b',
+ 'lcy;': '\u043b',
+ 'ldca;': '\u2936',
+ 'ldquo;': '\u201c',
+ 'ldquor;': '\u201e',
+ 'ldrdhar;': '\u2967',
+ 'ldrushar;': '\u294b',
+ 'ldsh;': '\u21b2',
+ 'lE;': '\u2266',
+ 'le;': '\u2264',
+ 'LeftAngleBracket;': '\u27e8',
+ 'LeftArrow;': '\u2190',
+ 'Leftarrow;': '\u21d0',
+ 'leftarrow;': '\u2190',
+ 'LeftArrowBar;': '\u21e4',
+ 'LeftArrowRightArrow;': '\u21c6',
+ 'leftarrowtail;': '\u21a2',
+ 'LeftCeiling;': '\u2308',
+ 'LeftDoubleBracket;': '\u27e6',
+ 'LeftDownTeeVector;': '\u2961',
+ 'LeftDownVector;': '\u21c3',
+ 'LeftDownVectorBar;': '\u2959',
+ 'LeftFloor;': '\u230a',
+ 'leftharpoondown;': '\u21bd',
+ 'leftharpoonup;': '\u21bc',
+ 'leftleftarrows;': '\u21c7',
+ 'LeftRightArrow;': '\u2194',
+ 'Leftrightarrow;': '\u21d4',
+ 'leftrightarrow;': '\u2194',
+ 'leftrightarrows;': '\u21c6',
+ 'leftrightharpoons;': '\u21cb',
+ 'leftrightsquigarrow;': '\u21ad',
+ 'LeftRightVector;': '\u294e',
+ 'LeftTee;': '\u22a3',
+ 'LeftTeeArrow;': '\u21a4',
+ 'LeftTeeVector;': '\u295a',
+ 'leftthreetimes;': '\u22cb',
+ 'LeftTriangle;': '\u22b2',
+ 'LeftTriangleBar;': '\u29cf',
+ 'LeftTriangleEqual;': '\u22b4',
+ 'LeftUpDownVector;': '\u2951',
+ 'LeftUpTeeVector;': '\u2960',
+ 'LeftUpVector;': '\u21bf',
+ 'LeftUpVectorBar;': '\u2958',
+ 'LeftVector;': '\u21bc',
+ 'LeftVectorBar;': '\u2952',
+ 'lEg;': '\u2a8b',
+ 'leg;': '\u22da',
+ 'leq;': '\u2264',
+ 'leqq;': '\u2266',
+ 'leqslant;': '\u2a7d',
+ 'les;': '\u2a7d',
+ 'lescc;': '\u2aa8',
+ 'lesdot;': '\u2a7f',
+ 'lesdoto;': '\u2a81',
+ 'lesdotor;': '\u2a83',
+ 'lesg;': '\u22da\ufe00',
+ 'lesges;': '\u2a93',
+ 'lessapprox;': '\u2a85',
+ 'lessdot;': '\u22d6',
+ 'lesseqgtr;': '\u22da',
+ 'lesseqqgtr;': '\u2a8b',
+ 'LessEqualGreater;': '\u22da',
+ 'LessFullEqual;': '\u2266',
+ 'LessGreater;': '\u2276',
+ 'lessgtr;': '\u2276',
+ 'LessLess;': '\u2aa1',
+ 'lesssim;': '\u2272',
+ 'LessSlantEqual;': '\u2a7d',
+ 'LessTilde;': '\u2272',
+ 'lfisht;': '\u297c',
+ 'lfloor;': '\u230a',
+ 'Lfr;': '\U0001d50f',
+ 'lfr;': '\U0001d529',
+ 'lg;': '\u2276',
+ 'lgE;': '\u2a91',
+ 'lHar;': '\u2962',
+ 'lhard;': '\u21bd',
+ 'lharu;': '\u21bc',
+ 'lharul;': '\u296a',
+ 'lhblk;': '\u2584',
+ 'LJcy;': '\u0409',
+ 'ljcy;': '\u0459',
+ 'Ll;': '\u22d8',
+ 'll;': '\u226a',
+ 'llarr;': '\u21c7',
+ 'llcorner;': '\u231e',
+ 'Lleftarrow;': '\u21da',
+ 'llhard;': '\u296b',
+ 'lltri;': '\u25fa',
+ 'Lmidot;': '\u013f',
+ 'lmidot;': '\u0140',
+ 'lmoust;': '\u23b0',
+ 'lmoustache;': '\u23b0',
+ 'lnap;': '\u2a89',
+ 'lnapprox;': '\u2a89',
+ 'lnE;': '\u2268',
+ 'lne;': '\u2a87',
+ 'lneq;': '\u2a87',
+ 'lneqq;': '\u2268',
+ 'lnsim;': '\u22e6',
+ 'loang;': '\u27ec',
+ 'loarr;': '\u21fd',
+ 'lobrk;': '\u27e6',
+ 'LongLeftArrow;': '\u27f5',
+ 'Longleftarrow;': '\u27f8',
+ 'longleftarrow;': '\u27f5',
+ 'LongLeftRightArrow;': '\u27f7',
+ 'Longleftrightarrow;': '\u27fa',
+ 'longleftrightarrow;': '\u27f7',
+ 'longmapsto;': '\u27fc',
+ 'LongRightArrow;': '\u27f6',
+ 'Longrightarrow;': '\u27f9',
+ 'longrightarrow;': '\u27f6',
+ 'looparrowleft;': '\u21ab',
+ 'looparrowright;': '\u21ac',
+ 'lopar;': '\u2985',
+ 'Lopf;': '\U0001d543',
+ 'lopf;': '\U0001d55d',
+ 'loplus;': '\u2a2d',
+ 'lotimes;': '\u2a34',
+ 'lowast;': '\u2217',
+ 'lowbar;': '_',
+ 'LowerLeftArrow;': '\u2199',
+ 'LowerRightArrow;': '\u2198',
+ 'loz;': '\u25ca',
+ 'lozenge;': '\u25ca',
+ 'lozf;': '\u29eb',
+ 'lpar;': '(',
+ 'lparlt;': '\u2993',
+ 'lrarr;': '\u21c6',
+ 'lrcorner;': '\u231f',
+ 'lrhar;': '\u21cb',
+ 'lrhard;': '\u296d',
+ 'lrm;': '\u200e',
+ 'lrtri;': '\u22bf',
+ 'lsaquo;': '\u2039',
+ 'Lscr;': '\u2112',
+ 'lscr;': '\U0001d4c1',
+ 'Lsh;': '\u21b0',
+ 'lsh;': '\u21b0',
+ 'lsim;': '\u2272',
+ 'lsime;': '\u2a8d',
+ 'lsimg;': '\u2a8f',
+ 'lsqb;': '[',
+ 'lsquo;': '\u2018',
+ 'lsquor;': '\u201a',
+ 'Lstrok;': '\u0141',
+ 'lstrok;': '\u0142',
+ 'LT': '<',
+ 'lt': '<',
+ 'LT;': '<',
+ 'Lt;': '\u226a',
+ 'lt;': '<',
+ 'ltcc;': '\u2aa6',
+ 'ltcir;': '\u2a79',
+ 'ltdot;': '\u22d6',
+ 'lthree;': '\u22cb',
+ 'ltimes;': '\u22c9',
+ 'ltlarr;': '\u2976',
+ 'ltquest;': '\u2a7b',
+ 'ltri;': '\u25c3',
+ 'ltrie;': '\u22b4',
+ 'ltrif;': '\u25c2',
+ 'ltrPar;': '\u2996',
+ 'lurdshar;': '\u294a',
+ 'luruhar;': '\u2966',
+ 'lvertneqq;': '\u2268\ufe00',
+ 'lvnE;': '\u2268\ufe00',
+ 'macr': '\xaf',
+ 'macr;': '\xaf',
+ 'male;': '\u2642',
+ 'malt;': '\u2720',
+ 'maltese;': '\u2720',
+ 'Map;': '\u2905',
+ 'map;': '\u21a6',
+ 'mapsto;': '\u21a6',
+ 'mapstodown;': '\u21a7',
+ 'mapstoleft;': '\u21a4',
+ 'mapstoup;': '\u21a5',
+ 'marker;': '\u25ae',
+ 'mcomma;': '\u2a29',
+ 'Mcy;': '\u041c',
+ 'mcy;': '\u043c',
+ 'mdash;': '\u2014',
+ 'mDDot;': '\u223a',
+ 'measuredangle;': '\u2221',
+ 'MediumSpace;': '\u205f',
+ 'Mellintrf;': '\u2133',
+ 'Mfr;': '\U0001d510',
+ 'mfr;': '\U0001d52a',
+ 'mho;': '\u2127',
+ 'micro': '\xb5',
+ 'micro;': '\xb5',
+ 'mid;': '\u2223',
+ 'midast;': '*',
+ 'midcir;': '\u2af0',
+ 'middot': '\xb7',
+ 'middot;': '\xb7',
+ 'minus;': '\u2212',
+ 'minusb;': '\u229f',
+ 'minusd;': '\u2238',
+ 'minusdu;': '\u2a2a',
+ 'MinusPlus;': '\u2213',
+ 'mlcp;': '\u2adb',
+ 'mldr;': '\u2026',
+ 'mnplus;': '\u2213',
+ 'models;': '\u22a7',
+ 'Mopf;': '\U0001d544',
+ 'mopf;': '\U0001d55e',
+ 'mp;': '\u2213',
+ 'Mscr;': '\u2133',
+ 'mscr;': '\U0001d4c2',
+ 'mstpos;': '\u223e',
+ 'Mu;': '\u039c',
+ 'mu;': '\u03bc',
+ 'multimap;': '\u22b8',
+ 'mumap;': '\u22b8',
+ 'nabla;': '\u2207',
+ 'Nacute;': '\u0143',
+ 'nacute;': '\u0144',
+ 'nang;': '\u2220\u20d2',
+ 'nap;': '\u2249',
+ 'napE;': '\u2a70\u0338',
+ 'napid;': '\u224b\u0338',
+ 'napos;': '\u0149',
+ 'napprox;': '\u2249',
+ 'natur;': '\u266e',
+ 'natural;': '\u266e',
+ 'naturals;': '\u2115',
+ 'nbsp': '\xa0',
+ 'nbsp;': '\xa0',
+ 'nbump;': '\u224e\u0338',
+ 'nbumpe;': '\u224f\u0338',
+ 'ncap;': '\u2a43',
+ 'Ncaron;': '\u0147',
+ 'ncaron;': '\u0148',
+ 'Ncedil;': '\u0145',
+ 'ncedil;': '\u0146',
+ 'ncong;': '\u2247',
+ 'ncongdot;': '\u2a6d\u0338',
+ 'ncup;': '\u2a42',
+ 'Ncy;': '\u041d',
+ 'ncy;': '\u043d',
+ 'ndash;': '\u2013',
+ 'ne;': '\u2260',
+ 'nearhk;': '\u2924',
+ 'neArr;': '\u21d7',
+ 'nearr;': '\u2197',
+ 'nearrow;': '\u2197',
+ 'nedot;': '\u2250\u0338',
+ 'NegativeMediumSpace;': '\u200b',
+ 'NegativeThickSpace;': '\u200b',
+ 'NegativeThinSpace;': '\u200b',
+ 'NegativeVeryThinSpace;': '\u200b',
+ 'nequiv;': '\u2262',
+ 'nesear;': '\u2928',
+ 'nesim;': '\u2242\u0338',
+ 'NestedGreaterGreater;': '\u226b',
+ 'NestedLessLess;': '\u226a',
+ 'NewLine;': '\n',
+ 'nexist;': '\u2204',
+ 'nexists;': '\u2204',
+ 'Nfr;': '\U0001d511',
+ 'nfr;': '\U0001d52b',
+ 'ngE;': '\u2267\u0338',
+ 'nge;': '\u2271',
+ 'ngeq;': '\u2271',
+ 'ngeqq;': '\u2267\u0338',
+ 'ngeqslant;': '\u2a7e\u0338',
+ 'nges;': '\u2a7e\u0338',
+ 'nGg;': '\u22d9\u0338',
+ 'ngsim;': '\u2275',
+ 'nGt;': '\u226b\u20d2',
+ 'ngt;': '\u226f',
+ 'ngtr;': '\u226f',
+ 'nGtv;': '\u226b\u0338',
+ 'nhArr;': '\u21ce',
+ 'nharr;': '\u21ae',
+ 'nhpar;': '\u2af2',
+ 'ni;': '\u220b',
+ 'nis;': '\u22fc',
+ 'nisd;': '\u22fa',
+ 'niv;': '\u220b',
+ 'NJcy;': '\u040a',
+ 'njcy;': '\u045a',
+ 'nlArr;': '\u21cd',
+ 'nlarr;': '\u219a',
+ 'nldr;': '\u2025',
+ 'nlE;': '\u2266\u0338',
+ 'nle;': '\u2270',
+ 'nLeftarrow;': '\u21cd',
+ 'nleftarrow;': '\u219a',
+ 'nLeftrightarrow;': '\u21ce',
+ 'nleftrightarrow;': '\u21ae',
+ 'nleq;': '\u2270',
+ 'nleqq;': '\u2266\u0338',
+ 'nleqslant;': '\u2a7d\u0338',
+ 'nles;': '\u2a7d\u0338',
+ 'nless;': '\u226e',
+ 'nLl;': '\u22d8\u0338',
+ 'nlsim;': '\u2274',
+ 'nLt;': '\u226a\u20d2',
+ 'nlt;': '\u226e',
+ 'nltri;': '\u22ea',
+ 'nltrie;': '\u22ec',
+ 'nLtv;': '\u226a\u0338',
+ 'nmid;': '\u2224',
+ 'NoBreak;': '\u2060',
+ 'NonBreakingSpace;': '\xa0',
+ 'Nopf;': '\u2115',
+ 'nopf;': '\U0001d55f',
+ 'not': '\xac',
+ 'Not;': '\u2aec',
+ 'not;': '\xac',
+ 'NotCongruent;': '\u2262',
+ 'NotCupCap;': '\u226d',
+ 'NotDoubleVerticalBar;': '\u2226',
+ 'NotElement;': '\u2209',
+ 'NotEqual;': '\u2260',
+ 'NotEqualTilde;': '\u2242\u0338',
+ 'NotExists;': '\u2204',
+ 'NotGreater;': '\u226f',
+ 'NotGreaterEqual;': '\u2271',
+ 'NotGreaterFullEqual;': '\u2267\u0338',
+ 'NotGreaterGreater;': '\u226b\u0338',
+ 'NotGreaterLess;': '\u2279',
+ 'NotGreaterSlantEqual;': '\u2a7e\u0338',
+ 'NotGreaterTilde;': '\u2275',
+ 'NotHumpDownHump;': '\u224e\u0338',
+ 'NotHumpEqual;': '\u224f\u0338',
+ 'notin;': '\u2209',
+ 'notindot;': '\u22f5\u0338',
+ 'notinE;': '\u22f9\u0338',
+ 'notinva;': '\u2209',
+ 'notinvb;': '\u22f7',
+ 'notinvc;': '\u22f6',
+ 'NotLeftTriangle;': '\u22ea',
+ 'NotLeftTriangleBar;': '\u29cf\u0338',
+ 'NotLeftTriangleEqual;': '\u22ec',
+ 'NotLess;': '\u226e',
+ 'NotLessEqual;': '\u2270',
+ 'NotLessGreater;': '\u2278',
+ 'NotLessLess;': '\u226a\u0338',
+ 'NotLessSlantEqual;': '\u2a7d\u0338',
+ 'NotLessTilde;': '\u2274',
+ 'NotNestedGreaterGreater;': '\u2aa2\u0338',
+ 'NotNestedLessLess;': '\u2aa1\u0338',
+ 'notni;': '\u220c',
+ 'notniva;': '\u220c',
+ 'notnivb;': '\u22fe',
+ 'notnivc;': '\u22fd',
+ 'NotPrecedes;': '\u2280',
+ 'NotPrecedesEqual;': '\u2aaf\u0338',
+ 'NotPrecedesSlantEqual;': '\u22e0',
+ 'NotReverseElement;': '\u220c',
+ 'NotRightTriangle;': '\u22eb',
+ 'NotRightTriangleBar;': '\u29d0\u0338',
+ 'NotRightTriangleEqual;': '\u22ed',
+ 'NotSquareSubset;': '\u228f\u0338',
+ 'NotSquareSubsetEqual;': '\u22e2',
+ 'NotSquareSuperset;': '\u2290\u0338',
+ 'NotSquareSupersetEqual;': '\u22e3',
+ 'NotSubset;': '\u2282\u20d2',
+ 'NotSubsetEqual;': '\u2288',
+ 'NotSucceeds;': '\u2281',
+ 'NotSucceedsEqual;': '\u2ab0\u0338',
+ 'NotSucceedsSlantEqual;': '\u22e1',
+ 'NotSucceedsTilde;': '\u227f\u0338',
+ 'NotSuperset;': '\u2283\u20d2',
+ 'NotSupersetEqual;': '\u2289',
+ 'NotTilde;': '\u2241',
+ 'NotTildeEqual;': '\u2244',
+ 'NotTildeFullEqual;': '\u2247',
+ 'NotTildeTilde;': '\u2249',
+ 'NotVerticalBar;': '\u2224',
+ 'npar;': '\u2226',
+ 'nparallel;': '\u2226',
+ 'nparsl;': '\u2afd\u20e5',
+ 'npart;': '\u2202\u0338',
+ 'npolint;': '\u2a14',
+ 'npr;': '\u2280',
+ 'nprcue;': '\u22e0',
+ 'npre;': '\u2aaf\u0338',
+ 'nprec;': '\u2280',
+ 'npreceq;': '\u2aaf\u0338',
+ 'nrArr;': '\u21cf',
+ 'nrarr;': '\u219b',
+ 'nrarrc;': '\u2933\u0338',
+ 'nrarrw;': '\u219d\u0338',
+ 'nRightarrow;': '\u21cf',
+ 'nrightarrow;': '\u219b',
+ 'nrtri;': '\u22eb',
+ 'nrtrie;': '\u22ed',
+ 'nsc;': '\u2281',
+ 'nsccue;': '\u22e1',
+ 'nsce;': '\u2ab0\u0338',
+ 'Nscr;': '\U0001d4a9',
+ 'nscr;': '\U0001d4c3',
+ 'nshortmid;': '\u2224',
+ 'nshortparallel;': '\u2226',
+ 'nsim;': '\u2241',
+ 'nsime;': '\u2244',
+ 'nsimeq;': '\u2244',
+ 'nsmid;': '\u2224',
+ 'nspar;': '\u2226',
+ 'nsqsube;': '\u22e2',
+ 'nsqsupe;': '\u22e3',
+ 'nsub;': '\u2284',
+ 'nsubE;': '\u2ac5\u0338',
+ 'nsube;': '\u2288',
+ 'nsubset;': '\u2282\u20d2',
+ 'nsubseteq;': '\u2288',
+ 'nsubseteqq;': '\u2ac5\u0338',
+ 'nsucc;': '\u2281',
+ 'nsucceq;': '\u2ab0\u0338',
+ 'nsup;': '\u2285',
+ 'nsupE;': '\u2ac6\u0338',
+ 'nsupe;': '\u2289',
+ 'nsupset;': '\u2283\u20d2',
+ 'nsupseteq;': '\u2289',
+ 'nsupseteqq;': '\u2ac6\u0338',
+ 'ntgl;': '\u2279',
+ 'Ntilde': '\xd1',
+ 'ntilde': '\xf1',
+ 'Ntilde;': '\xd1',
+ 'ntilde;': '\xf1',
+ 'ntlg;': '\u2278',
+ 'ntriangleleft;': '\u22ea',
+ 'ntrianglelefteq;': '\u22ec',
+ 'ntriangleright;': '\u22eb',
+ 'ntrianglerighteq;': '\u22ed',
+ 'Nu;': '\u039d',
+ 'nu;': '\u03bd',
+ 'num;': '#',
+ 'numero;': '\u2116',
+ 'numsp;': '\u2007',
+ 'nvap;': '\u224d\u20d2',
+ 'nVDash;': '\u22af',
+ 'nVdash;': '\u22ae',
+ 'nvDash;': '\u22ad',
+ 'nvdash;': '\u22ac',
+ 'nvge;': '\u2265\u20d2',
+ 'nvgt;': '>\u20d2',
+ 'nvHarr;': '\u2904',
+ 'nvinfin;': '\u29de',
+ 'nvlArr;': '\u2902',
+ 'nvle;': '\u2264\u20d2',
+ 'nvlt;': '<\u20d2',
+ 'nvltrie;': '\u22b4\u20d2',
+ 'nvrArr;': '\u2903',
+ 'nvrtrie;': '\u22b5\u20d2',
+ 'nvsim;': '\u223c\u20d2',
+ 'nwarhk;': '\u2923',
+ 'nwArr;': '\u21d6',
+ 'nwarr;': '\u2196',
+ 'nwarrow;': '\u2196',
+ 'nwnear;': '\u2927',
+ 'Oacute': '\xd3',
+ 'oacute': '\xf3',
+ 'Oacute;': '\xd3',
+ 'oacute;': '\xf3',
+ 'oast;': '\u229b',
+ 'ocir;': '\u229a',
+ 'Ocirc': '\xd4',
+ 'ocirc': '\xf4',
+ 'Ocirc;': '\xd4',
+ 'ocirc;': '\xf4',
+ 'Ocy;': '\u041e',
+ 'ocy;': '\u043e',
+ 'odash;': '\u229d',
+ 'Odblac;': '\u0150',
+ 'odblac;': '\u0151',
+ 'odiv;': '\u2a38',
+ 'odot;': '\u2299',
+ 'odsold;': '\u29bc',
+ 'OElig;': '\u0152',
+ 'oelig;': '\u0153',
+ 'ofcir;': '\u29bf',
+ 'Ofr;': '\U0001d512',
+ 'ofr;': '\U0001d52c',
+ 'ogon;': '\u02db',
+ 'Ograve': '\xd2',
+ 'ograve': '\xf2',
+ 'Ograve;': '\xd2',
+ 'ograve;': '\xf2',
+ 'ogt;': '\u29c1',
+ 'ohbar;': '\u29b5',
+ 'ohm;': '\u03a9',
+ 'oint;': '\u222e',
+ 'olarr;': '\u21ba',
+ 'olcir;': '\u29be',
+ 'olcross;': '\u29bb',
+ 'oline;': '\u203e',
+ 'olt;': '\u29c0',
+ 'Omacr;': '\u014c',
+ 'omacr;': '\u014d',
+ 'Omega;': '\u03a9',
+ 'omega;': '\u03c9',
+ 'Omicron;': '\u039f',
+ 'omicron;': '\u03bf',
+ 'omid;': '\u29b6',
+ 'ominus;': '\u2296',
+ 'Oopf;': '\U0001d546',
+ 'oopf;': '\U0001d560',
+ 'opar;': '\u29b7',
+ 'OpenCurlyDoubleQuote;': '\u201c',
+ 'OpenCurlyQuote;': '\u2018',
+ 'operp;': '\u29b9',
+ 'oplus;': '\u2295',
+ 'Or;': '\u2a54',
+ 'or;': '\u2228',
+ 'orarr;': '\u21bb',
+ 'ord;': '\u2a5d',
+ 'order;': '\u2134',
+ 'orderof;': '\u2134',
+ 'ordf': '\xaa',
+ 'ordf;': '\xaa',
+ 'ordm': '\xba',
+ 'ordm;': '\xba',
+ 'origof;': '\u22b6',
+ 'oror;': '\u2a56',
+ 'orslope;': '\u2a57',
+ 'orv;': '\u2a5b',
+ 'oS;': '\u24c8',
+ 'Oscr;': '\U0001d4aa',
+ 'oscr;': '\u2134',
+ 'Oslash': '\xd8',
+ 'oslash': '\xf8',
+ 'Oslash;': '\xd8',
+ 'oslash;': '\xf8',
+ 'osol;': '\u2298',
+ 'Otilde': '\xd5',
+ 'otilde': '\xf5',
+ 'Otilde;': '\xd5',
+ 'otilde;': '\xf5',
+ 'Otimes;': '\u2a37',
+ 'otimes;': '\u2297',
+ 'otimesas;': '\u2a36',
+ 'Ouml': '\xd6',
+ 'ouml': '\xf6',
+ 'Ouml;': '\xd6',
+ 'ouml;': '\xf6',
+ 'ovbar;': '\u233d',
+ 'OverBar;': '\u203e',
+ 'OverBrace;': '\u23de',
+ 'OverBracket;': '\u23b4',
+ 'OverParenthesis;': '\u23dc',
+ 'par;': '\u2225',
+ 'para': '\xb6',
+ 'para;': '\xb6',
+ 'parallel;': '\u2225',
+ 'parsim;': '\u2af3',
+ 'parsl;': '\u2afd',
+ 'part;': '\u2202',
+ 'PartialD;': '\u2202',
+ 'Pcy;': '\u041f',
+ 'pcy;': '\u043f',
+ 'percnt;': '%',
+ 'period;': '.',
+ 'permil;': '\u2030',
+ 'perp;': '\u22a5',
+ 'pertenk;': '\u2031',
+ 'Pfr;': '\U0001d513',
+ 'pfr;': '\U0001d52d',
+ 'Phi;': '\u03a6',
+ 'phi;': '\u03c6',
+ 'phiv;': '\u03d5',
+ 'phmmat;': '\u2133',
+ 'phone;': '\u260e',
+ 'Pi;': '\u03a0',
+ 'pi;': '\u03c0',
+ 'pitchfork;': '\u22d4',
+ 'piv;': '\u03d6',
+ 'planck;': '\u210f',
+ 'planckh;': '\u210e',
+ 'plankv;': '\u210f',
+ 'plus;': '+',
+ 'plusacir;': '\u2a23',
+ 'plusb;': '\u229e',
+ 'pluscir;': '\u2a22',
+ 'plusdo;': '\u2214',
+ 'plusdu;': '\u2a25',
+ 'pluse;': '\u2a72',
+ 'PlusMinus;': '\xb1',
+ 'plusmn': '\xb1',
+ 'plusmn;': '\xb1',
+ 'plussim;': '\u2a26',
+ 'plustwo;': '\u2a27',
+ 'pm;': '\xb1',
+ 'Poincareplane;': '\u210c',
+ 'pointint;': '\u2a15',
+ 'Popf;': '\u2119',
+ 'popf;': '\U0001d561',
+ 'pound': '\xa3',
+ 'pound;': '\xa3',
+ 'Pr;': '\u2abb',
+ 'pr;': '\u227a',
+ 'prap;': '\u2ab7',
+ 'prcue;': '\u227c',
+ 'prE;': '\u2ab3',
+ 'pre;': '\u2aaf',
+ 'prec;': '\u227a',
+ 'precapprox;': '\u2ab7',
+ 'preccurlyeq;': '\u227c',
+ 'Precedes;': '\u227a',
+ 'PrecedesEqual;': '\u2aaf',
+ 'PrecedesSlantEqual;': '\u227c',
+ 'PrecedesTilde;': '\u227e',
+ 'preceq;': '\u2aaf',
+ 'precnapprox;': '\u2ab9',
+ 'precneqq;': '\u2ab5',
+ 'precnsim;': '\u22e8',
+ 'precsim;': '\u227e',
+ 'Prime;': '\u2033',
+ 'prime;': '\u2032',
+ 'primes;': '\u2119',
+ 'prnap;': '\u2ab9',
+ 'prnE;': '\u2ab5',
+ 'prnsim;': '\u22e8',
+ 'prod;': '\u220f',
+ 'Product;': '\u220f',
+ 'profalar;': '\u232e',
+ 'profline;': '\u2312',
+ 'profsurf;': '\u2313',
+ 'prop;': '\u221d',
+ 'Proportion;': '\u2237',
+ 'Proportional;': '\u221d',
+ 'propto;': '\u221d',
+ 'prsim;': '\u227e',
+ 'prurel;': '\u22b0',
+ 'Pscr;': '\U0001d4ab',
+ 'pscr;': '\U0001d4c5',
+ 'Psi;': '\u03a8',
+ 'psi;': '\u03c8',
+ 'puncsp;': '\u2008',
+ 'Qfr;': '\U0001d514',
+ 'qfr;': '\U0001d52e',
+ 'qint;': '\u2a0c',
+ 'Qopf;': '\u211a',
+ 'qopf;': '\U0001d562',
+ 'qprime;': '\u2057',
+ 'Qscr;': '\U0001d4ac',
+ 'qscr;': '\U0001d4c6',
+ 'quaternions;': '\u210d',
+ 'quatint;': '\u2a16',
+ 'quest;': '?',
+ 'questeq;': '\u225f',
+ 'QUOT': '"',
+ 'quot': '"',
+ 'QUOT;': '"',
+ 'quot;': '"',
+ 'rAarr;': '\u21db',
+ 'race;': '\u223d\u0331',
+ 'Racute;': '\u0154',
+ 'racute;': '\u0155',
+ 'radic;': '\u221a',
+ 'raemptyv;': '\u29b3',
+ 'Rang;': '\u27eb',
+ 'rang;': '\u27e9',
+ 'rangd;': '\u2992',
+ 'range;': '\u29a5',
+ 'rangle;': '\u27e9',
+ 'raquo': '\xbb',
+ 'raquo;': '\xbb',
+ 'Rarr;': '\u21a0',
+ 'rArr;': '\u21d2',
+ 'rarr;': '\u2192',
+ 'rarrap;': '\u2975',
+ 'rarrb;': '\u21e5',
+ 'rarrbfs;': '\u2920',
+ 'rarrc;': '\u2933',
+ 'rarrfs;': '\u291e',
+ 'rarrhk;': '\u21aa',
+ 'rarrlp;': '\u21ac',
+ 'rarrpl;': '\u2945',
+ 'rarrsim;': '\u2974',
+ 'Rarrtl;': '\u2916',
+ 'rarrtl;': '\u21a3',
+ 'rarrw;': '\u219d',
+ 'rAtail;': '\u291c',
+ 'ratail;': '\u291a',
+ 'ratio;': '\u2236',
+ 'rationals;': '\u211a',
+ 'RBarr;': '\u2910',
+ 'rBarr;': '\u290f',
+ 'rbarr;': '\u290d',
+ 'rbbrk;': '\u2773',
+ 'rbrace;': '}',
+ 'rbrack;': ']',
+ 'rbrke;': '\u298c',
+ 'rbrksld;': '\u298e',
+ 'rbrkslu;': '\u2990',
+ 'Rcaron;': '\u0158',
+ 'rcaron;': '\u0159',
+ 'Rcedil;': '\u0156',
+ 'rcedil;': '\u0157',
+ 'rceil;': '\u2309',
+ 'rcub;': '}',
+ 'Rcy;': '\u0420',
+ 'rcy;': '\u0440',
+ 'rdca;': '\u2937',
+ 'rdldhar;': '\u2969',
+ 'rdquo;': '\u201d',
+ 'rdquor;': '\u201d',
+ 'rdsh;': '\u21b3',
+ 'Re;': '\u211c',
+ 'real;': '\u211c',
+ 'realine;': '\u211b',
+ 'realpart;': '\u211c',
+ 'reals;': '\u211d',
+ 'rect;': '\u25ad',
+ 'REG': '\xae',
+ 'reg': '\xae',
+ 'REG;': '\xae',
+ 'reg;': '\xae',
+ 'ReverseElement;': '\u220b',
+ 'ReverseEquilibrium;': '\u21cb',
+ 'ReverseUpEquilibrium;': '\u296f',
+ 'rfisht;': '\u297d',
+ 'rfloor;': '\u230b',
+ 'Rfr;': '\u211c',
+ 'rfr;': '\U0001d52f',
+ 'rHar;': '\u2964',
+ 'rhard;': '\u21c1',
+ 'rharu;': '\u21c0',
+ 'rharul;': '\u296c',
+ 'Rho;': '\u03a1',
+ 'rho;': '\u03c1',
+ 'rhov;': '\u03f1',
+ 'RightAngleBracket;': '\u27e9',
+ 'RightArrow;': '\u2192',
+ 'Rightarrow;': '\u21d2',
+ 'rightarrow;': '\u2192',
+ 'RightArrowBar;': '\u21e5',
+ 'RightArrowLeftArrow;': '\u21c4',
+ 'rightarrowtail;': '\u21a3',
+ 'RightCeiling;': '\u2309',
+ 'RightDoubleBracket;': '\u27e7',
+ 'RightDownTeeVector;': '\u295d',
+ 'RightDownVector;': '\u21c2',
+ 'RightDownVectorBar;': '\u2955',
+ 'RightFloor;': '\u230b',
+ 'rightharpoondown;': '\u21c1',
+ 'rightharpoonup;': '\u21c0',
+ 'rightleftarrows;': '\u21c4',
+ 'rightleftharpoons;': '\u21cc',
+ 'rightrightarrows;': '\u21c9',
+ 'rightsquigarrow;': '\u219d',
+ 'RightTee;': '\u22a2',
+ 'RightTeeArrow;': '\u21a6',
+ 'RightTeeVector;': '\u295b',
+ 'rightthreetimes;': '\u22cc',
+ 'RightTriangle;': '\u22b3',
+ 'RightTriangleBar;': '\u29d0',
+ 'RightTriangleEqual;': '\u22b5',
+ 'RightUpDownVector;': '\u294f',
+ 'RightUpTeeVector;': '\u295c',
+ 'RightUpVector;': '\u21be',
+ 'RightUpVectorBar;': '\u2954',
+ 'RightVector;': '\u21c0',
+ 'RightVectorBar;': '\u2953',
+ 'ring;': '\u02da',
+ 'risingdotseq;': '\u2253',
+ 'rlarr;': '\u21c4',
+ 'rlhar;': '\u21cc',
+ 'rlm;': '\u200f',
+ 'rmoust;': '\u23b1',
+ 'rmoustache;': '\u23b1',
+ 'rnmid;': '\u2aee',
+ 'roang;': '\u27ed',
+ 'roarr;': '\u21fe',
+ 'robrk;': '\u27e7',
+ 'ropar;': '\u2986',
+ 'Ropf;': '\u211d',
+ 'ropf;': '\U0001d563',
+ 'roplus;': '\u2a2e',
+ 'rotimes;': '\u2a35',
+ 'RoundImplies;': '\u2970',
+ 'rpar;': ')',
+ 'rpargt;': '\u2994',
+ 'rppolint;': '\u2a12',
+ 'rrarr;': '\u21c9',
+ 'Rrightarrow;': '\u21db',
+ 'rsaquo;': '\u203a',
+ 'Rscr;': '\u211b',
+ 'rscr;': '\U0001d4c7',
+ 'Rsh;': '\u21b1',
+ 'rsh;': '\u21b1',
+ 'rsqb;': ']',
+ 'rsquo;': '\u2019',
+ 'rsquor;': '\u2019',
+ 'rthree;': '\u22cc',
+ 'rtimes;': '\u22ca',
+ 'rtri;': '\u25b9',
+ 'rtrie;': '\u22b5',
+ 'rtrif;': '\u25b8',
+ 'rtriltri;': '\u29ce',
+ 'RuleDelayed;': '\u29f4',
+ 'ruluhar;': '\u2968',
+ 'rx;': '\u211e',
+ 'Sacute;': '\u015a',
+ 'sacute;': '\u015b',
+ 'sbquo;': '\u201a',
+ 'Sc;': '\u2abc',
+ 'sc;': '\u227b',
+ 'scap;': '\u2ab8',
+ 'Scaron;': '\u0160',
+ 'scaron;': '\u0161',
+ 'sccue;': '\u227d',
+ 'scE;': '\u2ab4',
+ 'sce;': '\u2ab0',
+ 'Scedil;': '\u015e',
+ 'scedil;': '\u015f',
+ 'Scirc;': '\u015c',
+ 'scirc;': '\u015d',
+ 'scnap;': '\u2aba',
+ 'scnE;': '\u2ab6',
+ 'scnsim;': '\u22e9',
+ 'scpolint;': '\u2a13',
+ 'scsim;': '\u227f',
+ 'Scy;': '\u0421',
+ 'scy;': '\u0441',
+ 'sdot;': '\u22c5',
+ 'sdotb;': '\u22a1',
+ 'sdote;': '\u2a66',
+ 'searhk;': '\u2925',
+ 'seArr;': '\u21d8',
+ 'searr;': '\u2198',
+ 'searrow;': '\u2198',
+ 'sect': '\xa7',
+ 'sect;': '\xa7',
+ 'semi;': ';',
+ 'seswar;': '\u2929',
+ 'setminus;': '\u2216',
+ 'setmn;': '\u2216',
+ 'sext;': '\u2736',
+ 'Sfr;': '\U0001d516',
+ 'sfr;': '\U0001d530',
+ 'sfrown;': '\u2322',
+ 'sharp;': '\u266f',
+ 'SHCHcy;': '\u0429',
+ 'shchcy;': '\u0449',
+ 'SHcy;': '\u0428',
+ 'shcy;': '\u0448',
+ 'ShortDownArrow;': '\u2193',
+ 'ShortLeftArrow;': '\u2190',
+ 'shortmid;': '\u2223',
+ 'shortparallel;': '\u2225',
+ 'ShortRightArrow;': '\u2192',
+ 'ShortUpArrow;': '\u2191',
+ 'shy': '\xad',
+ 'shy;': '\xad',
+ 'Sigma;': '\u03a3',
+ 'sigma;': '\u03c3',
+ 'sigmaf;': '\u03c2',
+ 'sigmav;': '\u03c2',
+ 'sim;': '\u223c',
+ 'simdot;': '\u2a6a',
+ 'sime;': '\u2243',
+ 'simeq;': '\u2243',
+ 'simg;': '\u2a9e',
+ 'simgE;': '\u2aa0',
+ 'siml;': '\u2a9d',
+ 'simlE;': '\u2a9f',
+ 'simne;': '\u2246',
+ 'simplus;': '\u2a24',
+ 'simrarr;': '\u2972',
+ 'slarr;': '\u2190',
+ 'SmallCircle;': '\u2218',
+ 'smallsetminus;': '\u2216',
+ 'smashp;': '\u2a33',
+ 'smeparsl;': '\u29e4',
+ 'smid;': '\u2223',
+ 'smile;': '\u2323',
+ 'smt;': '\u2aaa',
+ 'smte;': '\u2aac',
+ 'smtes;': '\u2aac\ufe00',
+ 'SOFTcy;': '\u042c',
+ 'softcy;': '\u044c',
+ 'sol;': '/',
+ 'solb;': '\u29c4',
+ 'solbar;': '\u233f',
+ 'Sopf;': '\U0001d54a',
+ 'sopf;': '\U0001d564',
+ 'spades;': '\u2660',
+ 'spadesuit;': '\u2660',
+ 'spar;': '\u2225',
+ 'sqcap;': '\u2293',
+ 'sqcaps;': '\u2293\ufe00',
+ 'sqcup;': '\u2294',
+ 'sqcups;': '\u2294\ufe00',
+ 'Sqrt;': '\u221a',
+ 'sqsub;': '\u228f',
+ 'sqsube;': '\u2291',
+ 'sqsubset;': '\u228f',
+ 'sqsubseteq;': '\u2291',
+ 'sqsup;': '\u2290',
+ 'sqsupe;': '\u2292',
+ 'sqsupset;': '\u2290',
+ 'sqsupseteq;': '\u2292',
+ 'squ;': '\u25a1',
+ 'Square;': '\u25a1',
+ 'square;': '\u25a1',
+ 'SquareIntersection;': '\u2293',
+ 'SquareSubset;': '\u228f',
+ 'SquareSubsetEqual;': '\u2291',
+ 'SquareSuperset;': '\u2290',
+ 'SquareSupersetEqual;': '\u2292',
+ 'SquareUnion;': '\u2294',
+ 'squarf;': '\u25aa',
+ 'squf;': '\u25aa',
+ 'srarr;': '\u2192',
+ 'Sscr;': '\U0001d4ae',
+ 'sscr;': '\U0001d4c8',
+ 'ssetmn;': '\u2216',
+ 'ssmile;': '\u2323',
+ 'sstarf;': '\u22c6',
+ 'Star;': '\u22c6',
+ 'star;': '\u2606',
+ 'starf;': '\u2605',
+ 'straightepsilon;': '\u03f5',
+ 'straightphi;': '\u03d5',
+ 'strns;': '\xaf',
+ 'Sub;': '\u22d0',
+ 'sub;': '\u2282',
+ 'subdot;': '\u2abd',
+ 'subE;': '\u2ac5',
+ 'sube;': '\u2286',
+ 'subedot;': '\u2ac3',
+ 'submult;': '\u2ac1',
+ 'subnE;': '\u2acb',
+ 'subne;': '\u228a',
+ 'subplus;': '\u2abf',
+ 'subrarr;': '\u2979',
+ 'Subset;': '\u22d0',
+ 'subset;': '\u2282',
+ 'subseteq;': '\u2286',
+ 'subseteqq;': '\u2ac5',
+ 'SubsetEqual;': '\u2286',
+ 'subsetneq;': '\u228a',
+ 'subsetneqq;': '\u2acb',
+ 'subsim;': '\u2ac7',
+ 'subsub;': '\u2ad5',
+ 'subsup;': '\u2ad3',
+ 'succ;': '\u227b',
+ 'succapprox;': '\u2ab8',
+ 'succcurlyeq;': '\u227d',
+ 'Succeeds;': '\u227b',
+ 'SucceedsEqual;': '\u2ab0',
+ 'SucceedsSlantEqual;': '\u227d',
+ 'SucceedsTilde;': '\u227f',
+ 'succeq;': '\u2ab0',
+ 'succnapprox;': '\u2aba',
+ 'succneqq;': '\u2ab6',
+ 'succnsim;': '\u22e9',
+ 'succsim;': '\u227f',
+ 'SuchThat;': '\u220b',
+ 'Sum;': '\u2211',
+ 'sum;': '\u2211',
+ 'sung;': '\u266a',
+ 'sup1': '\xb9',
+ 'sup1;': '\xb9',
+ 'sup2': '\xb2',
+ 'sup2;': '\xb2',
+ 'sup3': '\xb3',
+ 'sup3;': '\xb3',
+ 'Sup;': '\u22d1',
+ 'sup;': '\u2283',
+ 'supdot;': '\u2abe',
+ 'supdsub;': '\u2ad8',
+ 'supE;': '\u2ac6',
+ 'supe;': '\u2287',
+ 'supedot;': '\u2ac4',
+ 'Superset;': '\u2283',
+ 'SupersetEqual;': '\u2287',
+ 'suphsol;': '\u27c9',
+ 'suphsub;': '\u2ad7',
+ 'suplarr;': '\u297b',
+ 'supmult;': '\u2ac2',
+ 'supnE;': '\u2acc',
+ 'supne;': '\u228b',
+ 'supplus;': '\u2ac0',
+ 'Supset;': '\u22d1',
+ 'supset;': '\u2283',
+ 'supseteq;': '\u2287',
+ 'supseteqq;': '\u2ac6',
+ 'supsetneq;': '\u228b',
+ 'supsetneqq;': '\u2acc',
+ 'supsim;': '\u2ac8',
+ 'supsub;': '\u2ad4',
+ 'supsup;': '\u2ad6',
+ 'swarhk;': '\u2926',
+ 'swArr;': '\u21d9',
+ 'swarr;': '\u2199',
+ 'swarrow;': '\u2199',
+ 'swnwar;': '\u292a',
+ 'szlig': '\xdf',
+ 'szlig;': '\xdf',
+ 'Tab;': '\t',
+ 'target;': '\u2316',
+ 'Tau;': '\u03a4',
+ 'tau;': '\u03c4',
+ 'tbrk;': '\u23b4',
+ 'Tcaron;': '\u0164',
+ 'tcaron;': '\u0165',
+ 'Tcedil;': '\u0162',
+ 'tcedil;': '\u0163',
+ 'Tcy;': '\u0422',
+ 'tcy;': '\u0442',
+ 'tdot;': '\u20db',
+ 'telrec;': '\u2315',
+ 'Tfr;': '\U0001d517',
+ 'tfr;': '\U0001d531',
+ 'there4;': '\u2234',
+ 'Therefore;': '\u2234',
+ 'therefore;': '\u2234',
+ 'Theta;': '\u0398',
+ 'theta;': '\u03b8',
+ 'thetasym;': '\u03d1',
+ 'thetav;': '\u03d1',
+ 'thickapprox;': '\u2248',
+ 'thicksim;': '\u223c',
+ 'ThickSpace;': '\u205f\u200a',
+ 'thinsp;': '\u2009',
+ 'ThinSpace;': '\u2009',
+ 'thkap;': '\u2248',
+ 'thksim;': '\u223c',
+ 'THORN': '\xde',
+ 'thorn': '\xfe',
+ 'THORN;': '\xde',
+ 'thorn;': '\xfe',
+ 'Tilde;': '\u223c',
+ 'tilde;': '\u02dc',
+ 'TildeEqual;': '\u2243',
+ 'TildeFullEqual;': '\u2245',
+ 'TildeTilde;': '\u2248',
+ 'times': '\xd7',
+ 'times;': '\xd7',
+ 'timesb;': '\u22a0',
+ 'timesbar;': '\u2a31',
+ 'timesd;': '\u2a30',
+ 'tint;': '\u222d',
+ 'toea;': '\u2928',
+ 'top;': '\u22a4',
+ 'topbot;': '\u2336',
+ 'topcir;': '\u2af1',
+ 'Topf;': '\U0001d54b',
+ 'topf;': '\U0001d565',
+ 'topfork;': '\u2ada',
+ 'tosa;': '\u2929',
+ 'tprime;': '\u2034',
+ 'TRADE;': '\u2122',
+ 'trade;': '\u2122',
+ 'triangle;': '\u25b5',
+ 'triangledown;': '\u25bf',
+ 'triangleleft;': '\u25c3',
+ 'trianglelefteq;': '\u22b4',
+ 'triangleq;': '\u225c',
+ 'triangleright;': '\u25b9',
+ 'trianglerighteq;': '\u22b5',
+ 'tridot;': '\u25ec',
+ 'trie;': '\u225c',
+ 'triminus;': '\u2a3a',
+ 'TripleDot;': '\u20db',
+ 'triplus;': '\u2a39',
+ 'trisb;': '\u29cd',
+ 'tritime;': '\u2a3b',
+ 'trpezium;': '\u23e2',
+ 'Tscr;': '\U0001d4af',
+ 'tscr;': '\U0001d4c9',
+ 'TScy;': '\u0426',
+ 'tscy;': '\u0446',
+ 'TSHcy;': '\u040b',
+ 'tshcy;': '\u045b',
+ 'Tstrok;': '\u0166',
+ 'tstrok;': '\u0167',
+ 'twixt;': '\u226c',
+ 'twoheadleftarrow;': '\u219e',
+ 'twoheadrightarrow;': '\u21a0',
+ 'Uacute': '\xda',
+ 'uacute': '\xfa',
+ 'Uacute;': '\xda',
+ 'uacute;': '\xfa',
+ 'Uarr;': '\u219f',
+ 'uArr;': '\u21d1',
+ 'uarr;': '\u2191',
+ 'Uarrocir;': '\u2949',
+ 'Ubrcy;': '\u040e',
+ 'ubrcy;': '\u045e',
+ 'Ubreve;': '\u016c',
+ 'ubreve;': '\u016d',
+ 'Ucirc': '\xdb',
+ 'ucirc': '\xfb',
+ 'Ucirc;': '\xdb',
+ 'ucirc;': '\xfb',
+ 'Ucy;': '\u0423',
+ 'ucy;': '\u0443',
+ 'udarr;': '\u21c5',
+ 'Udblac;': '\u0170',
+ 'udblac;': '\u0171',
+ 'udhar;': '\u296e',
+ 'ufisht;': '\u297e',
+ 'Ufr;': '\U0001d518',
+ 'ufr;': '\U0001d532',
+ 'Ugrave': '\xd9',
+ 'ugrave': '\xf9',
+ 'Ugrave;': '\xd9',
+ 'ugrave;': '\xf9',
+ 'uHar;': '\u2963',
+ 'uharl;': '\u21bf',
+ 'uharr;': '\u21be',
+ 'uhblk;': '\u2580',
+ 'ulcorn;': '\u231c',
+ 'ulcorner;': '\u231c',
+ 'ulcrop;': '\u230f',
+ 'ultri;': '\u25f8',
+ 'Umacr;': '\u016a',
+ 'umacr;': '\u016b',
+ 'uml': '\xa8',
+ 'uml;': '\xa8',
+ 'UnderBar;': '_',
+ 'UnderBrace;': '\u23df',
+ 'UnderBracket;': '\u23b5',
+ 'UnderParenthesis;': '\u23dd',
+ 'Union;': '\u22c3',
+ 'UnionPlus;': '\u228e',
+ 'Uogon;': '\u0172',
+ 'uogon;': '\u0173',
+ 'Uopf;': '\U0001d54c',
+ 'uopf;': '\U0001d566',
+ 'UpArrow;': '\u2191',
+ 'Uparrow;': '\u21d1',
+ 'uparrow;': '\u2191',
+ 'UpArrowBar;': '\u2912',
+ 'UpArrowDownArrow;': '\u21c5',
+ 'UpDownArrow;': '\u2195',
+ 'Updownarrow;': '\u21d5',
+ 'updownarrow;': '\u2195',
+ 'UpEquilibrium;': '\u296e',
+ 'upharpoonleft;': '\u21bf',
+ 'upharpoonright;': '\u21be',
+ 'uplus;': '\u228e',
+ 'UpperLeftArrow;': '\u2196',
+ 'UpperRightArrow;': '\u2197',
+ 'Upsi;': '\u03d2',
+ 'upsi;': '\u03c5',
+ 'upsih;': '\u03d2',
+ 'Upsilon;': '\u03a5',
+ 'upsilon;': '\u03c5',
+ 'UpTee;': '\u22a5',
+ 'UpTeeArrow;': '\u21a5',
+ 'upuparrows;': '\u21c8',
+ 'urcorn;': '\u231d',
+ 'urcorner;': '\u231d',
+ 'urcrop;': '\u230e',
+ 'Uring;': '\u016e',
+ 'uring;': '\u016f',
+ 'urtri;': '\u25f9',
+ 'Uscr;': '\U0001d4b0',
+ 'uscr;': '\U0001d4ca',
+ 'utdot;': '\u22f0',
+ 'Utilde;': '\u0168',
+ 'utilde;': '\u0169',
+ 'utri;': '\u25b5',
+ 'utrif;': '\u25b4',
+ 'uuarr;': '\u21c8',
+ 'Uuml': '\xdc',
+ 'uuml': '\xfc',
+ 'Uuml;': '\xdc',
+ 'uuml;': '\xfc',
+ 'uwangle;': '\u29a7',
+ 'vangrt;': '\u299c',
+ 'varepsilon;': '\u03f5',
+ 'varkappa;': '\u03f0',
+ 'varnothing;': '\u2205',
+ 'varphi;': '\u03d5',
+ 'varpi;': '\u03d6',
+ 'varpropto;': '\u221d',
+ 'vArr;': '\u21d5',
+ 'varr;': '\u2195',
+ 'varrho;': '\u03f1',
+ 'varsigma;': '\u03c2',
+ 'varsubsetneq;': '\u228a\ufe00',
+ 'varsubsetneqq;': '\u2acb\ufe00',
+ 'varsupsetneq;': '\u228b\ufe00',
+ 'varsupsetneqq;': '\u2acc\ufe00',
+ 'vartheta;': '\u03d1',
+ 'vartriangleleft;': '\u22b2',
+ 'vartriangleright;': '\u22b3',
+ 'Vbar;': '\u2aeb',
+ 'vBar;': '\u2ae8',
+ 'vBarv;': '\u2ae9',
+ 'Vcy;': '\u0412',
+ 'vcy;': '\u0432',
+ 'VDash;': '\u22ab',
+ 'Vdash;': '\u22a9',
+ 'vDash;': '\u22a8',
+ 'vdash;': '\u22a2',
+ 'Vdashl;': '\u2ae6',
+ 'Vee;': '\u22c1',
+ 'vee;': '\u2228',
+ 'veebar;': '\u22bb',
+ 'veeeq;': '\u225a',
+ 'vellip;': '\u22ee',
+ 'Verbar;': '\u2016',
+ 'verbar;': '|',
+ 'Vert;': '\u2016',
+ 'vert;': '|',
+ 'VerticalBar;': '\u2223',
+ 'VerticalLine;': '|',
+ 'VerticalSeparator;': '\u2758',
+ 'VerticalTilde;': '\u2240',
+ 'VeryThinSpace;': '\u200a',
+ 'Vfr;': '\U0001d519',
+ 'vfr;': '\U0001d533',
+ 'vltri;': '\u22b2',
+ 'vnsub;': '\u2282\u20d2',
+ 'vnsup;': '\u2283\u20d2',
+ 'Vopf;': '\U0001d54d',
+ 'vopf;': '\U0001d567',
+ 'vprop;': '\u221d',
+ 'vrtri;': '\u22b3',
+ 'Vscr;': '\U0001d4b1',
+ 'vscr;': '\U0001d4cb',
+ 'vsubnE;': '\u2acb\ufe00',
+ 'vsubne;': '\u228a\ufe00',
+ 'vsupnE;': '\u2acc\ufe00',
+ 'vsupne;': '\u228b\ufe00',
+ 'Vvdash;': '\u22aa',
+ 'vzigzag;': '\u299a',
+ 'Wcirc;': '\u0174',
+ 'wcirc;': '\u0175',
+ 'wedbar;': '\u2a5f',
+ 'Wedge;': '\u22c0',
+ 'wedge;': '\u2227',
+ 'wedgeq;': '\u2259',
+ 'weierp;': '\u2118',
+ 'Wfr;': '\U0001d51a',
+ 'wfr;': '\U0001d534',
+ 'Wopf;': '\U0001d54e',
+ 'wopf;': '\U0001d568',
+ 'wp;': '\u2118',
+ 'wr;': '\u2240',
+ 'wreath;': '\u2240',
+ 'Wscr;': '\U0001d4b2',
+ 'wscr;': '\U0001d4cc',
+ 'xcap;': '\u22c2',
+ 'xcirc;': '\u25ef',
+ 'xcup;': '\u22c3',
+ 'xdtri;': '\u25bd',
+ 'Xfr;': '\U0001d51b',
+ 'xfr;': '\U0001d535',
+ 'xhArr;': '\u27fa',
+ 'xharr;': '\u27f7',
+ 'Xi;': '\u039e',
+ 'xi;': '\u03be',
+ 'xlArr;': '\u27f8',
+ 'xlarr;': '\u27f5',
+ 'xmap;': '\u27fc',
+ 'xnis;': '\u22fb',
+ 'xodot;': '\u2a00',
+ 'Xopf;': '\U0001d54f',
+ 'xopf;': '\U0001d569',
+ 'xoplus;': '\u2a01',
+ 'xotime;': '\u2a02',
+ 'xrArr;': '\u27f9',
+ 'xrarr;': '\u27f6',
+ 'Xscr;': '\U0001d4b3',
+ 'xscr;': '\U0001d4cd',
+ 'xsqcup;': '\u2a06',
+ 'xuplus;': '\u2a04',
+ 'xutri;': '\u25b3',
+ 'xvee;': '\u22c1',
+ 'xwedge;': '\u22c0',
+ 'Yacute': '\xdd',
+ 'yacute': '\xfd',
+ 'Yacute;': '\xdd',
+ 'yacute;': '\xfd',
+ 'YAcy;': '\u042f',
+ 'yacy;': '\u044f',
+ 'Ycirc;': '\u0176',
+ 'ycirc;': '\u0177',
+ 'Ycy;': '\u042b',
+ 'ycy;': '\u044b',
+ 'yen': '\xa5',
+ 'yen;': '\xa5',
+ 'Yfr;': '\U0001d51c',
+ 'yfr;': '\U0001d536',
+ 'YIcy;': '\u0407',
+ 'yicy;': '\u0457',
+ 'Yopf;': '\U0001d550',
+ 'yopf;': '\U0001d56a',
+ 'Yscr;': '\U0001d4b4',
+ 'yscr;': '\U0001d4ce',
+ 'YUcy;': '\u042e',
+ 'yucy;': '\u044e',
+ 'yuml': '\xff',
+ 'Yuml;': '\u0178',
+ 'yuml;': '\xff',
+ 'Zacute;': '\u0179',
+ 'zacute;': '\u017a',
+ 'Zcaron;': '\u017d',
+ 'zcaron;': '\u017e',
+ 'Zcy;': '\u0417',
+ 'zcy;': '\u0437',
+ 'Zdot;': '\u017b',
+ 'zdot;': '\u017c',
+ 'zeetrf;': '\u2128',
+ 'ZeroWidthSpace;': '\u200b',
+ 'Zeta;': '\u0396',
+ 'zeta;': '\u03b6',
+ 'Zfr;': '\u2128',
+ 'zfr;': '\U0001d537',
+ 'ZHcy;': '\u0416',
+ 'zhcy;': '\u0436',
+ 'zigrarr;': '\u21dd',
+ 'Zopf;': '\u2124',
+ 'zopf;': '\U0001d56b',
+ 'Zscr;': '\U0001d4b5',
+ 'zscr;': '\U0001d4cf',
+ 'zwj;': '\u200d',
+ 'zwnj;': '\u200c',
+ }
+
+try:
+ import http.client as compat_http_client
+except ImportError: # Python 2
+ import httplib as compat_http_client
+
+try:
+ from urllib.error import HTTPError as compat_HTTPError
+except ImportError: # Python 2
+ from urllib2 import HTTPError as compat_HTTPError
+
+try:
+ from urllib.request import urlretrieve as compat_urlretrieve
+except ImportError: # Python 2
+ from urllib import urlretrieve as compat_urlretrieve
+
+try:
+ from html.parser import HTMLParser as compat_HTMLParser
+except ImportError: # Python 2
+ from HTMLParser import HTMLParser as compat_HTMLParser
+
+try: # Python 2
+ from HTMLParser import HTMLParseError as compat_HTMLParseError
+except ImportError: # Python <3.4
+ try:
+ from html.parser import HTMLParseError as compat_HTMLParseError
+ except ImportError: # Python >3.4
+
+ # HTMLParseError has been deprecated in Python 3.3 and removed in
+ # Python 3.5. Introducing dummy exception for Python >3.5 for compatible
+ # and uniform cross-version exceptiong handling
+ class compat_HTMLParseError(Exception):
+ pass
+
+try:
+ from subprocess import DEVNULL
+ compat_subprocess_get_DEVNULL = lambda: DEVNULL
+except ImportError:
+ compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
+
+try:
+ import http.server as compat_http_server
+except ImportError:
+ import BaseHTTPServer as compat_http_server
+
+try:
+ compat_str = unicode # Python 2
+except NameError:
+ compat_str = str
+
+try:
+ from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
+ from urllib.parse import unquote as compat_urllib_parse_unquote
+ from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus
+except ImportError: # Python 2
+ _asciire = (compat_urllib_parse._asciire if hasattr(compat_urllib_parse, '_asciire')
+ else re.compile(r'([\x00-\x7f]+)'))
+
+ # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus
+ # implementations from cpython 3.4.3's stdlib. Python 2's version
+ # is apparently broken (see https://github.com/ytdl-org/youtube-dl/pull/6244)
+
+ def compat_urllib_parse_unquote_to_bytes(string):
+ """unquote_to_bytes('abc%20def') -> b'abc def'."""
+ # Note: strings are encoded as UTF-8. This is only an issue if it contains
+ # unescaped non-ASCII characters, which URIs should not.
+ if not string:
+ # Is it a string-like object?
+ string.split
+ return b''
+ if isinstance(string, compat_str):
+ string = string.encode('utf-8')
+ bits = string.split(b'%')
+ if len(bits) == 1:
+ return string
+ res = [bits[0]]
+ append = res.append
+ for item in bits[1:]:
+ try:
+ append(compat_urllib_parse._hextochr[item[:2]])
+ append(item[2:])
+ except KeyError:
+ append(b'%')
+ append(item)
+ return b''.join(res)
+
+ def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
+ """Replace %xx escapes by their single-character equivalent. The optional
+ encoding and errors parameters specify how to decode percent-encoded
+ sequences into Unicode characters, as accepted by the bytes.decode()
+ method.
+ By default, percent-encoded sequences are decoded with UTF-8, and invalid
+ sequences are replaced by a placeholder character.
+
+ unquote('abc%20def') -> 'abc def'.
+ """
+ if '%' not in string:
+ string.split
+ return string
+ if encoding is None:
+ encoding = 'utf-8'
+ if errors is None:
+ errors = 'replace'
+ bits = _asciire.split(string)
+ res = [bits[0]]
+ append = res.append
+ for i in range(1, len(bits), 2):
+ append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors))
+ append(bits[i + 1])
+ return ''.join(res)
+
+ def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'):
+ """Like unquote(), but also replace plus signs by spaces, as required for
+ unquoting HTML form values.
+
+ unquote_plus('%7e/abc+def') -> '~/abc def'
+ """
+ string = string.replace('+', ' ')
+ return compat_urllib_parse_unquote(string, encoding, errors)
+
+try:
+ from urllib.parse import urlencode as compat_urllib_parse_urlencode
+except ImportError: # Python 2
+ # Python 2 will choke in urlencode on mixture of byte and unicode strings.
+ # Possible solutions are to either port it from python 3 with all
+ # the friends or manually ensure input query contains only byte strings.
+ # We will stick with latter thus recursively encoding the whole query.
+ def compat_urllib_parse_urlencode(query, doseq=0, encoding='utf-8'):
+ def encode_elem(e):
+ if isinstance(e, dict):
+ e = encode_dict(e)
+ elif isinstance(e, (list, tuple,)):
+ list_e = encode_list(e)
+ e = tuple(list_e) if isinstance(e, tuple) else list_e
+ elif isinstance(e, compat_str):
+ e = e.encode(encoding)
+ return e
+
+ def encode_dict(d):
+ return dict((encode_elem(k), encode_elem(v)) for k, v in d.items())
+
+ def encode_list(l):
+ return [encode_elem(e) for e in l]
+
+ return compat_urllib_parse.urlencode(encode_elem(query), doseq=doseq)
+
+try:
+ from urllib.request import DataHandler as compat_urllib_request_DataHandler
+except ImportError: # Python < 3.4
+ # Ported from CPython 98774:1733b3bd46db, Lib/urllib/request.py
+ class compat_urllib_request_DataHandler(compat_urllib_request.BaseHandler):
+ def data_open(self, req):
+ # data URLs as specified in RFC 2397.
+ #
+ # ignores POSTed data
+ #
+ # syntax:
+ # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
+ # mediatype := [ type "/" subtype ] *( ";" parameter )
+ # data := *urlchar
+ # parameter := attribute "=" value
+ url = req.get_full_url()
+
+ scheme, data = url.split(':', 1)
+ mediatype, data = data.split(',', 1)
+
+ # even base64 encoded data URLs might be quoted so unquote in any case:
+ data = compat_urllib_parse_unquote_to_bytes(data)
+ if mediatype.endswith(';base64'):
+ data = binascii.a2b_base64(data)
+ mediatype = mediatype[:-7]
+
+ if not mediatype:
+ mediatype = 'text/plain;charset=US-ASCII'
+
+ headers = email.message_from_string(
+ 'Content-type: %s\nContent-length: %d\n' % (mediatype, len(data)))
+
+ return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
+
+try:
+ compat_basestring = basestring # Python 2
+except NameError:
+ compat_basestring = str
+
+try:
+ compat_chr = unichr # Python 2
+except NameError:
+ compat_chr = chr
+
+try:
+ from xml.etree.ElementTree import ParseError as compat_xml_parse_error
+except ImportError: # Python 2.6
+ from xml.parsers.expat import ExpatError as compat_xml_parse_error
+
+
+etree = xml.etree.ElementTree
+
+
+class _TreeBuilder(etree.TreeBuilder):
+ def doctype(self, name, pubid, system):
+ pass
+
+
+try:
+ # xml.etree.ElementTree.Element is a method in Python <=2.6 and
+ # the following will crash with:
+ # TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types
+ isinstance(None, xml.etree.ElementTree.Element)
+ from xml.etree.ElementTree import Element as compat_etree_Element
+except TypeError: # Python <=2.6
+ from xml.etree.ElementTree import _ElementInterface as compat_etree_Element
+
+if sys.version_info[0] >= 3:
+ def compat_etree_fromstring(text):
+ return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder()))
+else:
+ # python 2.x tries to encode unicode strings with ascii (see the
+ # XMLParser._fixtext method)
+ try:
+ _etree_iter = etree.Element.iter
+ except AttributeError: # Python <=2.6
+ def _etree_iter(root):
+ for el in root.findall('*'):
+ yield el
+ for sub in _etree_iter(el):
+ yield sub
+
+ # on 2.6 XML doesn't have a parser argument, function copied from CPython
+ # 2.7 source
+ def _XML(text, parser=None):
+ if not parser:
+ parser = etree.XMLParser(target=_TreeBuilder())
+ parser.feed(text)
+ return parser.close()
+
+ def _element_factory(*args, **kwargs):
+ el = etree.Element(*args, **kwargs)
+ for k, v in el.items():
+ if isinstance(v, bytes):
+ el.set(k, v.decode('utf-8'))
+ return el
+
+ def compat_etree_fromstring(text):
+ doc = _XML(text, parser=etree.XMLParser(target=_TreeBuilder(element_factory=_element_factory)))
+ for el in _etree_iter(doc):
+ if el.text is not None and isinstance(el.text, bytes):
+ el.text = el.text.decode('utf-8')
+ return doc
+
+if hasattr(etree, 'register_namespace'):
+ compat_etree_register_namespace = etree.register_namespace
+else:
+ def compat_etree_register_namespace(prefix, uri):
+ """Register a namespace prefix.
+ The registry is global, and any existing mapping for either the
+ given prefix or the namespace URI will be removed.
+ *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
+ attributes in this namespace will be serialized with prefix if possible.
+ ValueError is raised if prefix is reserved or is invalid.
+ """
+ if re.match(r"ns\d+$", prefix):
+ raise ValueError("Prefix format reserved for internal use")
+ for k, v in list(etree._namespace_map.items()):
+ if k == uri or v == prefix:
+ del etree._namespace_map[k]
+ etree._namespace_map[uri] = prefix
+
+if sys.version_info < (2, 7):
+ # Here comes the crazy part: In 2.6, if the xpath is a unicode,
+ # .//node does not match if a node is a direct child of . !
+ def compat_xpath(xpath):
+ if isinstance(xpath, compat_str):
+ xpath = xpath.encode('ascii')
+ return xpath
+else:
+ compat_xpath = lambda xpath: xpath
+
+try:
+ from urllib.parse import parse_qs as compat_parse_qs
+except ImportError: # Python 2
+ # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
+ # Python 2's version is apparently totally broken
+
+ def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
+ encoding='utf-8', errors='replace'):
+ qs, _coerce_result = qs, compat_str
+ pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
+ r = []
+ for name_value in pairs:
+ if not name_value and not strict_parsing:
+ continue
+ nv = name_value.split('=', 1)
+ if len(nv) != 2:
+ if strict_parsing:
+ raise ValueError('bad query field: %r' % (name_value,))
+ # Handle case of a control-name with no equal sign
+ if keep_blank_values:
+ nv.append('')
+ else:
+ continue
+ if len(nv[1]) or keep_blank_values:
+ name = nv[0].replace('+', ' ')
+ name = compat_urllib_parse_unquote(
+ name, encoding=encoding, errors=errors)
+ name = _coerce_result(name)
+ value = nv[1].replace('+', ' ')
+ value = compat_urllib_parse_unquote(
+ value, encoding=encoding, errors=errors)
+ value = _coerce_result(value)
+ r.append((name, value))
+ return r
+
+ def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
+ encoding='utf-8', errors='replace'):
+ parsed_result = {}
+ pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
+ encoding=encoding, errors=errors)
+ for name, value in pairs:
+ if name in parsed_result:
+ parsed_result[name].append(value)
+ else:
+ parsed_result[name] = [value]
+ return parsed_result
+
+
+compat_os_name = os._name if os.name == 'java' else os.name
+
+
+if compat_os_name == 'nt':
+ def compat_shlex_quote(s):
+ return s if re.match(r'^[-_\w./]+$', s) else '"%s"' % s.replace('"', '\\"')
+else:
+ try:
+ from shlex import quote as compat_shlex_quote
+ except ImportError: # Python < 3.3
+ def compat_shlex_quote(s):
+ if re.match(r'^[-_\w./]+$', s):
+ return s
+ else:
+ return "'" + s.replace("'", "'\"'\"'") + "'"
+
+
+try:
+ args = shlex.split('中文')
+ assert (isinstance(args, list)
+ and isinstance(args[0], compat_str)
+ and args[0] == '中文')
+ compat_shlex_split = shlex.split
+except (AssertionError, UnicodeEncodeError):
+ # Working around shlex issue with unicode strings on some python 2
+ # versions (see http://bugs.python.org/issue1548891)
+ def compat_shlex_split(s, comments=False, posix=True):
+ if isinstance(s, compat_str):
+ s = s.encode('utf-8')
+ return list(map(lambda s: s.decode('utf-8'), shlex.split(s, comments, posix)))
+
+
+def compat_ord(c):
+ if type(c) is int:
+ return c
+ else:
+ return ord(c)
+
+
+if sys.version_info >= (3, 0):
+ compat_getenv = os.getenv
+ compat_expanduser = os.path.expanduser
+
+ def compat_setenv(key, value, env=os.environ):
+ env[key] = value
+else:
+ # Environment variables should be decoded with filesystem encoding.
+ # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
+
+ def compat_getenv(key, default=None):
+ from .utils import get_filesystem_encoding
+ env = os.getenv(key, default)
+ if env:
+ env = env.decode(get_filesystem_encoding())
+ return env
+
+ def compat_setenv(key, value, env=os.environ):
+ def encode(v):
+ from .utils import get_filesystem_encoding
+ return v.encode(get_filesystem_encoding()) if isinstance(v, compat_str) else v
+ env[encode(key)] = encode(value)
+
+ # HACK: The default implementations of os.path.expanduser from cpython do not decode
+ # environment variables with filesystem encoding. We will work around this by
+ # providing adjusted implementations.
+ # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
+ # for different platforms with correct environment variables decoding.
+
+ if compat_os_name == 'posix':
+ def compat_expanduser(path):
+ """Expand ~ and ~user constructions. If user or $HOME is unknown,
+ do nothing."""
+ if not path.startswith('~'):
+ return path
+ i = path.find('/', 1)
+ if i < 0:
+ i = len(path)
+ if i == 1:
+ if 'HOME' not in os.environ:
+ import pwd
+ userhome = pwd.getpwuid(os.getuid()).pw_dir
+ else:
+ userhome = compat_getenv('HOME')
+ else:
+ import pwd
+ try:
+ pwent = pwd.getpwnam(path[1:i])
+ except KeyError:
+ return path
+ userhome = pwent.pw_dir
+ userhome = userhome.rstrip('/')
+ return (userhome + path[i:]) or '/'
+ elif compat_os_name in ('nt', 'ce'):
+ def compat_expanduser(path):
+ """Expand ~ and ~user constructs.
+
+ If user or $HOME is unknown, do nothing."""
+ if path[:1] != '~':
+ return path
+ i, n = 1, len(path)
+ while i < n and path[i] not in '/\\':
+ i = i + 1
+
+ if 'HOME' in os.environ:
+ userhome = compat_getenv('HOME')
+ elif 'USERPROFILE' in os.environ:
+ userhome = compat_getenv('USERPROFILE')
+ elif 'HOMEPATH' not in os.environ:
+ return path
+ else:
+ try:
+ drive = compat_getenv('HOMEDRIVE')
+ except KeyError:
+ drive = ''
+ userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
+
+ if i != 1: # ~user
+ userhome = os.path.join(os.path.dirname(userhome), path[1:i])
+
+ return userhome + path[i:]
+ else:
+ compat_expanduser = os.path.expanduser
+
+
+if compat_os_name == 'nt' and sys.version_info < (3, 8):
+ # os.path.realpath on Windows does not follow symbolic links
+ # prior to Python 3.8 (see https://bugs.python.org/issue9949)
+ def compat_realpath(path):
+ while os.path.islink(path):
+ path = os.path.abspath(os.readlink(path))
+ return path
+else:
+ compat_realpath = os.path.realpath
+
+
+if sys.version_info < (3, 0):
+ def compat_print(s):
+ from .utils import preferredencoding
+ print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
+else:
+ def compat_print(s):
+ assert isinstance(s, compat_str)
+ print(s)
+
+
+if sys.version_info < (3, 0) and sys.platform == 'win32':
+ def compat_getpass(prompt, *args, **kwargs):
+ if isinstance(prompt, compat_str):
+ from .utils import preferredencoding
+ prompt = prompt.encode(preferredencoding())
+ return getpass.getpass(prompt, *args, **kwargs)
+else:
+ compat_getpass = getpass.getpass
+
+try:
+ compat_input = raw_input
+except NameError: # Python 3
+ compat_input = input
+
+# Python < 2.6.5 require kwargs to be bytes
+try:
+ def _testfunc(x):
+ pass
+ _testfunc(**{'x': 0})
+except TypeError:
+ def compat_kwargs(kwargs):
+ return dict((bytes(k), v) for k, v in kwargs.items())
+else:
+ compat_kwargs = lambda kwargs: kwargs
+
+
+try:
+ compat_numeric_types = (int, float, long, complex)
+except NameError: # Python 3
+ compat_numeric_types = (int, float, complex)
+
+
+try:
+ compat_integer_types = (int, long)
+except NameError: # Python 3
+ compat_integer_types = (int, )
+
+
+if sys.version_info < (2, 7):
+ def compat_socket_create_connection(address, timeout, source_address=None):
+ host, port = address
+ err = None
+ for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
+ af, socktype, proto, canonname, sa = res
+ sock = None
+ try:
+ sock = socket.socket(af, socktype, proto)
+ sock.settimeout(timeout)
+ if source_address:
+ sock.bind(source_address)
+ sock.connect(sa)
+ return sock
+ except socket.error as _:
+ err = _
+ if sock is not None:
+ sock.close()
+ if err is not None:
+ raise err
+ else:
+ raise socket.error('getaddrinfo returns an empty list')
+else:
+ compat_socket_create_connection = socket.create_connection
+
+
+# Fix https://github.com/ytdl-org/youtube-dl/issues/4223
+# See http://bugs.python.org/issue9161 for what is broken
+def workaround_optparse_bug9161():
+ op = optparse.OptionParser()
+ og = optparse.OptionGroup(op, 'foo')
+ try:
+ og.add_option('-t')
+ except TypeError:
+ real_add_option = optparse.OptionGroup.add_option
+
+ def _compat_add_option(self, *args, **kwargs):
+ enc = lambda v: (
+ v.encode('ascii', 'replace') if isinstance(v, compat_str)
+ else v)
+ bargs = [enc(a) for a in args]
+ bkwargs = dict(
+ (k, enc(v)) for k, v in kwargs.items())
+ return real_add_option(self, *bargs, **bkwargs)
+ optparse.OptionGroup.add_option = _compat_add_option
+
+
+if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
+ compat_get_terminal_size = shutil.get_terminal_size
+else:
+ _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
+
+ def compat_get_terminal_size(fallback=(80, 24)):
+ columns = compat_getenv('COLUMNS')
+ if columns:
+ columns = int(columns)
+ else:
+ columns = None
+ lines = compat_getenv('LINES')
+ if lines:
+ lines = int(lines)
+ else:
+ lines = None
+
+ if columns is None or lines is None or columns <= 0 or lines <= 0:
+ try:
+ sp = subprocess.Popen(
+ ['stty', 'size'],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ out, err = sp.communicate()
+ _lines, _columns = map(int, out.split())
+ except Exception:
+ _columns, _lines = _terminal_size(*fallback)
+
+ if columns is None or columns <= 0:
+ columns = _columns
+ if lines is None or lines <= 0:
+ lines = _lines
+ return _terminal_size(columns, lines)
+
+try:
+ itertools.count(start=0, step=1)
+ compat_itertools_count = itertools.count
+except TypeError: # Python 2.6
+ def compat_itertools_count(start=0, step=1):
+ n = start
+ while True:
+ yield n
+ n += step
+
+if sys.version_info >= (3, 0):
+ from tokenize import tokenize as compat_tokenize_tokenize
+else:
+ from tokenize import generate_tokens as compat_tokenize_tokenize
+
+
+try:
+ struct.pack('!I', 0)
+except TypeError:
+ # In Python 2.6 and 2.7.x < 2.7.7, struct requires a bytes argument
+ # See https://bugs.python.org/issue19099
+ def compat_struct_pack(spec, *args):
+ if isinstance(spec, compat_str):
+ spec = spec.encode('ascii')
+ return struct.pack(spec, *args)
+
+ def compat_struct_unpack(spec, *args):
+ if isinstance(spec, compat_str):
+ spec = spec.encode('ascii')
+ return struct.unpack(spec, *args)
+
+ class compat_Struct(struct.Struct):
+ def __init__(self, fmt):
+ if isinstance(fmt, compat_str):
+ fmt = fmt.encode('ascii')
+ super(compat_Struct, self).__init__(fmt)
+else:
+ compat_struct_pack = struct.pack
+ compat_struct_unpack = struct.unpack
+ if platform.python_implementation() == 'IronPython' and sys.version_info < (2, 7, 8):
+ class compat_Struct(struct.Struct):
+ def unpack(self, string):
+ if not isinstance(string, buffer): # noqa: F821
+ string = buffer(string) # noqa: F821
+ return super(compat_Struct, self).unpack(string)
+ else:
+ compat_Struct = struct.Struct
+
+
+try:
+ from future_builtins import zip as compat_zip
+except ImportError: # not 2.6+ or is 3.x
+ try:
+ from itertools import izip as compat_zip # < 2.5 or 3.x
+ except ImportError:
+ compat_zip = zip
+
+
+if sys.version_info < (3, 3):
+ def compat_b64decode(s, *args, **kwargs):
+ if isinstance(s, compat_str):
+ s = s.encode('ascii')
+ return base64.b64decode(s, *args, **kwargs)
+else:
+ compat_b64decode = base64.b64decode
+
+
+if platform.python_implementation() == 'PyPy' and sys.pypy_version_info < (5, 4, 0):
+ # PyPy2 prior to version 5.4.0 expects byte strings as Windows function
+ # names, see the original PyPy issue [1] and the youtube-dlc one [2].
+ # 1. https://bitbucket.org/pypy/pypy/issues/2360/windows-ctypescdll-typeerror-function-name
+ # 2. https://github.com/ytdl-org/youtube-dl/pull/4392
+ def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
+ real = ctypes.WINFUNCTYPE(*args, **kwargs)
+
+ def resf(tpl, *args, **kwargs):
+ funcname, dll = tpl
+ return real((str(funcname), dll), *args, **kwargs)
+
+ return resf
+else:
+ def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
+ return ctypes.WINFUNCTYPE(*args, **kwargs)
+
+
+__all__ = [
+ 'compat_HTMLParseError',
+ 'compat_HTMLParser',
+ 'compat_HTTPError',
+ 'compat_Struct',
+ 'compat_b64decode',
+ 'compat_basestring',
+ 'compat_chr',
+ 'compat_cookiejar',
+ 'compat_cookiejar_Cookie',
+ 'compat_cookies',
+ 'compat_ctypes_WINFUNCTYPE',
+ 'compat_etree_Element',
+ 'compat_etree_fromstring',
+ 'compat_etree_register_namespace',
+ 'compat_expanduser',
+ 'compat_get_terminal_size',
+ 'compat_getenv',
+ 'compat_getpass',
+ 'compat_html_entities',
+ 'compat_html_entities_html5',
+ 'compat_http_client',
+ 'compat_http_server',
+ 'compat_input',
+ 'compat_integer_types',
+ 'compat_itertools_count',
+ 'compat_kwargs',
+ 'compat_numeric_types',
+ 'compat_ord',
+ 'compat_os_name',
+ 'compat_parse_qs',
+ 'compat_print',
+ 'compat_realpath',
+ 'compat_setenv',
+ 'compat_shlex_quote',
+ 'compat_shlex_split',
+ 'compat_socket_create_connection',
+ 'compat_str',
+ 'compat_struct_pack',
+ 'compat_struct_unpack',
+ 'compat_subprocess_get_DEVNULL',
+ 'compat_tokenize_tokenize',
+ 'compat_urllib_error',
+ 'compat_urllib_parse',
+ 'compat_urllib_parse_unquote',
+ 'compat_urllib_parse_unquote_plus',
+ 'compat_urllib_parse_unquote_to_bytes',
+ 'compat_urllib_parse_urlencode',
+ 'compat_urllib_parse_urlparse',
+ 'compat_urllib_request',
+ 'compat_urllib_request_DataHandler',
+ 'compat_urllib_response',
+ 'compat_urlparse',
+ 'compat_urlretrieve',
+ 'compat_xml_parse_error',
+ 'compat_xpath',
+ 'compat_zip',
+ 'workaround_optparse_bug9161',
+]
diff --git a/youtube_dlc/downloader/__init__.py b/youtube_dlc/downloader/__init__.py
new file mode 100644
index 000000000..4ae81f516
--- /dev/null
+++ b/youtube_dlc/downloader/__init__.py
@@ -0,0 +1,63 @@
+from __future__ import unicode_literals
+
+from .common import FileDownloader
+from .f4m import F4mFD
+from .hls import HlsFD
+from .http import HttpFD
+from .rtmp import RtmpFD
+from .dash import DashSegmentsFD
+from .rtsp import RtspFD
+from .ism import IsmFD
+from .youtube_live_chat import YoutubeLiveChatReplayFD
+from .external import (
+ get_external_downloader,
+ FFmpegFD,
+)
+
+from ..utils import (
+ determine_protocol,
+)
+
+PROTOCOL_MAP = {
+ 'rtmp': RtmpFD,
+ 'm3u8_native': HlsFD,
+ 'm3u8': FFmpegFD,
+ 'mms': RtspFD,
+ 'rtsp': RtspFD,
+ 'f4m': F4mFD,
+ 'http_dash_segments': DashSegmentsFD,
+ 'ism': IsmFD,
+ 'youtube_live_chat_replay': YoutubeLiveChatReplayFD,
+}
+
+
+def get_suitable_downloader(info_dict, params={}):
+ """Get the downloader class that can handle the info dict."""
+ protocol = determine_protocol(info_dict)
+ info_dict['protocol'] = protocol
+
+ # if (info_dict.get('start_time') or info_dict.get('end_time')) and not info_dict.get('requested_formats') and FFmpegFD.can_download(info_dict):
+ # return FFmpegFD
+
+ external_downloader = params.get('external_downloader')
+ if external_downloader is not None:
+ ed = get_external_downloader(external_downloader)
+ if ed.can_download(info_dict):
+ return ed
+
+ if protocol.startswith('m3u8') and info_dict.get('is_live'):
+ return FFmpegFD
+
+ if protocol == 'm3u8' and params.get('hls_prefer_native') is True:
+ return HlsFD
+
+ if protocol == 'm3u8_native' and params.get('hls_prefer_native') is False:
+ return FFmpegFD
+
+ return PROTOCOL_MAP.get(protocol, HttpFD)
+
+
+__all__ = [
+ 'get_suitable_downloader',
+ 'FileDownloader',
+]
diff --git a/youtube_dlc/downloader/common.py b/youtube_dlc/downloader/common.py
new file mode 100644
index 000000000..31c286458
--- /dev/null
+++ b/youtube_dlc/downloader/common.py
@@ -0,0 +1,391 @@
+from __future__ import division, unicode_literals
+
+import os
+import re
+import sys
+import time
+import random
+
+from ..compat import compat_os_name
+from ..utils import (
+ decodeArgument,
+ encodeFilename,
+ error_to_compat_str,
+ format_bytes,
+ shell_quote,
+ timeconvert,
+)
+
+
+class FileDownloader(object):
+ """File Downloader class.
+
+ File downloader objects are the ones responsible of downloading the
+ actual video file and writing it to disk.
+
+ File downloaders accept a lot of parameters. In order not to saturate
+ the object constructor with arguments, it receives a dictionary of
+ options instead.
+
+ Available options:
+
+ verbose: Print additional info to stdout.
+ quiet: Do not print messages to stdout.
+ ratelimit: Download speed limit, in bytes/sec.
+ retries: Number of times to retry for HTTP error 5xx
+ buffersize: Size of download buffer in bytes.
+ noresizebuffer: Do not automatically resize the download buffer.
+ continuedl: Try to continue downloads if possible.
+ noprogress: Do not print the progress bar.
+ logtostderr: Log messages to stderr instead of stdout.
+ consoletitle: Display progress in console window's titlebar.
+ nopart: Do not use temporary .part files.
+ updatetime: Use the Last-modified header to set output file timestamps.
+ test: Download only first bytes to test the downloader.
+ min_filesize: Skip files smaller than this size
+ max_filesize: Skip files larger than this size
+ xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
+ external_downloader_args: A list of additional command-line arguments for the
+ external downloader.
+ hls_use_mpegts: Use the mpegts container for HLS videos.
+ http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
+ useful for bypassing bandwidth throttling imposed by
+ a webserver (experimental)
+
+ Subclasses of this one must re-define the real_download method.
+ """
+
+ _TEST_FILE_SIZE = 10241
+ params = None
+
+ def __init__(self, ydl, params):
+ """Create a FileDownloader object with the given options."""
+ self.ydl = ydl
+ self._progress_hooks = []
+ self.params = params
+ self.add_progress_hook(self.report_progress)
+
+ @staticmethod
+ def format_seconds(seconds):
+ (mins, secs) = divmod(seconds, 60)
+ (hours, mins) = divmod(mins, 60)
+ if hours > 99:
+ return '--:--:--'
+ if hours == 0:
+ return '%02d:%02d' % (mins, secs)
+ else:
+ return '%02d:%02d:%02d' % (hours, mins, secs)
+
+ @staticmethod
+ def calc_percent(byte_counter, data_len):
+ if data_len is None:
+ return None
+ return float(byte_counter) / float(data_len) * 100.0
+
+ @staticmethod
+ def format_percent(percent):
+ if percent is None:
+ return '---.-%'
+ return '%6s' % ('%3.1f%%' % percent)
+
+ @staticmethod
+ def calc_eta(start, now, total, current):
+ if total is None:
+ return None
+ if now is None:
+ now = time.time()
+ dif = now - start
+ if current == 0 or dif < 0.001: # One millisecond
+ return None
+ rate = float(current) / dif
+ return int((float(total) - float(current)) / rate)
+
+ @staticmethod
+ def format_eta(eta):
+ if eta is None:
+ return '--:--'
+ return FileDownloader.format_seconds(eta)
+
+ @staticmethod
+ def calc_speed(start, now, bytes):
+ dif = now - start
+ if bytes == 0 or dif < 0.001: # One millisecond
+ return None
+ return float(bytes) / dif
+
+ @staticmethod
+ def format_speed(speed):
+ if speed is None:
+ return '%10s' % '---b/s'
+ return '%10s' % ('%s/s' % format_bytes(speed))
+
+ @staticmethod
+ def format_retries(retries):
+ return 'inf' if retries == float('inf') else '%.0f' % retries
+
+ @staticmethod
+ def best_block_size(elapsed_time, bytes):
+ new_min = max(bytes / 2.0, 1.0)
+ new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
+ if elapsed_time < 0.001:
+ return int(new_max)
+ rate = bytes / elapsed_time
+ if rate > new_max:
+ return int(new_max)
+ if rate < new_min:
+ return int(new_min)
+ return int(rate)
+
+ @staticmethod
+ def parse_bytes(bytestr):
+ """Parse a string indicating a byte quantity into an integer."""
+ matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
+ if matchobj is None:
+ return None
+ number = float(matchobj.group(1))
+ multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
+ return int(round(number * multiplier))
+
+ def to_screen(self, *args, **kargs):
+ self.ydl.to_screen(*args, **kargs)
+
+ def to_stderr(self, message):
+ self.ydl.to_screen(message)
+
+ def to_console_title(self, message):
+ self.ydl.to_console_title(message)
+
+ def trouble(self, *args, **kargs):
+ self.ydl.trouble(*args, **kargs)
+
+ def report_warning(self, *args, **kargs):
+ self.ydl.report_warning(*args, **kargs)
+
+ def report_error(self, *args, **kargs):
+ self.ydl.report_error(*args, **kargs)
+
+ def slow_down(self, start_time, now, byte_counter):
+ """Sleep if the download speed is over the rate limit."""
+ rate_limit = self.params.get('ratelimit')
+ if rate_limit is None or byte_counter == 0:
+ return
+ if now is None:
+ now = time.time()
+ elapsed = now - start_time
+ if elapsed <= 0.0:
+ return
+ speed = float(byte_counter) / elapsed
+ if speed > rate_limit:
+ sleep_time = float(byte_counter) / rate_limit - elapsed
+ if sleep_time > 0:
+ time.sleep(sleep_time)
+
+ def temp_name(self, filename):
+ """Returns a temporary filename for the given filename."""
+ if self.params.get('nopart', False) or filename == '-' or \
+ (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
+ return filename
+ return filename + '.part'
+
+ def undo_temp_name(self, filename):
+ if filename.endswith('.part'):
+ return filename[:-len('.part')]
+ return filename
+
+ def ytdl_filename(self, filename):
+ return filename + '.ytdl'
+
+ def try_rename(self, old_filename, new_filename):
+ try:
+ if old_filename == new_filename:
+ return
+ os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
+ except (IOError, OSError) as err:
+ self.report_error('unable to rename file: %s' % error_to_compat_str(err))
+
+ def try_utime(self, filename, last_modified_hdr):
+ """Try to set the last-modified time of the given file."""
+ if last_modified_hdr is None:
+ return
+ if not os.path.isfile(encodeFilename(filename)):
+ return
+ timestr = last_modified_hdr
+ if timestr is None:
+ return
+ filetime = timeconvert(timestr)
+ if filetime is None:
+ return filetime
+ # Ignore obviously invalid dates
+ if filetime == 0:
+ return
+ try:
+ os.utime(filename, (time.time(), filetime))
+ except Exception:
+ pass
+ return filetime
+
+ def report_destination(self, filename):
+ """Report destination filename."""
+ self.to_screen('[download] Destination: ' + filename)
+
+ def _report_progress_status(self, msg, is_last_line=False):
+ fullmsg = '[download] ' + msg
+ if self.params.get('progress_with_newline', False):
+ self.to_screen(fullmsg)
+ else:
+ if compat_os_name == 'nt':
+ prev_len = getattr(self, '_report_progress_prev_line_length',
+ 0)
+ if prev_len > len(fullmsg):
+ fullmsg += ' ' * (prev_len - len(fullmsg))
+ self._report_progress_prev_line_length = len(fullmsg)
+ clear_line = '\r'
+ else:
+ clear_line = ('\r\x1b[K' if sys.stderr.isatty() else '\r')
+ self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
+ self.to_console_title('youtube-dlc ' + msg)
+
+ def report_progress(self, s):
+ if s['status'] == 'finished':
+ if self.params.get('noprogress', False):
+ self.to_screen('[download] Download completed')
+ else:
+ msg_template = '100%%'
+ if s.get('total_bytes') is not None:
+ s['_total_bytes_str'] = format_bytes(s['total_bytes'])
+ msg_template += ' of %(_total_bytes_str)s'
+ if s.get('elapsed') is not None:
+ s['_elapsed_str'] = self.format_seconds(s['elapsed'])
+ msg_template += ' in %(_elapsed_str)s'
+ self._report_progress_status(
+ msg_template % s, is_last_line=True)
+
+ if self.params.get('noprogress'):
+ return
+
+ if s['status'] != 'downloading':
+ return
+
+ if s.get('eta') is not None:
+ s['_eta_str'] = self.format_eta(s['eta'])
+ else:
+ s['_eta_str'] = 'Unknown ETA'
+
+ if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
+ s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
+ elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
+ s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
+ else:
+ if s.get('downloaded_bytes') == 0:
+ s['_percent_str'] = self.format_percent(0)
+ else:
+ s['_percent_str'] = 'Unknown %'
+
+ if s.get('speed') is not None:
+ s['_speed_str'] = self.format_speed(s['speed'])
+ else:
+ s['_speed_str'] = 'Unknown speed'
+
+ if s.get('total_bytes') is not None:
+ s['_total_bytes_str'] = format_bytes(s['total_bytes'])
+ msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
+ elif s.get('total_bytes_estimate') is not None:
+ s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
+ msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
+ else:
+ if s.get('downloaded_bytes') is not None:
+ s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
+ if s.get('elapsed'):
+ s['_elapsed_str'] = self.format_seconds(s['elapsed'])
+ msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
+ else:
+ msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
+ else:
+ msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s'
+
+ self._report_progress_status(msg_template % s)
+
+ def report_resuming_byte(self, resume_len):
+ """Report attempt to resume at given byte."""
+ self.to_screen('[download] Resuming download at byte %s' % resume_len)
+
+ def report_retry(self, err, count, retries):
+ """Report retry in case of HTTP error 5xx"""
+ self.to_screen(
+ '[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...'
+ % (error_to_compat_str(err), count, self.format_retries(retries)))
+
+ def report_file_already_downloaded(self, file_name):
+ """Report file has already been fully downloaded."""
+ try:
+ self.to_screen('[download] %s has already been downloaded' % file_name)
+ except UnicodeEncodeError:
+ self.to_screen('[download] The file has already been downloaded')
+
+ def report_unable_to_resume(self):
+ """Report it was impossible to resume download."""
+ self.to_screen('[download] Unable to resume')
+
+ def download(self, filename, info_dict):
+ """Download to a filename using the info from info_dict
+ Return True on success and False otherwise
+ """
+
+ nooverwrites_and_exists = (
+ self.params.get('nooverwrites', False)
+ and os.path.exists(encodeFilename(filename))
+ )
+
+ if not hasattr(filename, 'write'):
+ continuedl_and_exists = (
+ self.params.get('continuedl', True)
+ and os.path.isfile(encodeFilename(filename))
+ and not self.params.get('nopart', False)
+ )
+
+ # Check file already present
+ if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
+ self.report_file_already_downloaded(filename)
+ self._hook_progress({
+ 'filename': filename,
+ 'status': 'finished',
+ 'total_bytes': os.path.getsize(encodeFilename(filename)),
+ })
+ return True
+
+ min_sleep_interval = self.params.get('sleep_interval')
+ if min_sleep_interval:
+ max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
+ sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
+ self.to_screen(
+ '[download] Sleeping %s seconds...' % (
+ int(sleep_interval) if sleep_interval.is_integer()
+ else '%.2f' % sleep_interval))
+ time.sleep(sleep_interval)
+
+ return self.real_download(filename, info_dict)
+
+ def real_download(self, filename, info_dict):
+ """Real download process. Redefine in subclasses."""
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ def _hook_progress(self, status):
+ for ph in self._progress_hooks:
+ ph(status)
+
+ def add_progress_hook(self, ph):
+ # See YoutubeDl.py (search for progress_hooks) for a description of
+ # this interface
+ self._progress_hooks.append(ph)
+
+ def _debug_cmd(self, args, exe=None):
+ if not self.params.get('verbose', False):
+ return
+
+ str_args = [decodeArgument(a) for a in args]
+
+ if exe is None:
+ exe = os.path.basename(str_args[0])
+
+ self.to_screen('[debug] %s command line: %s' % (
+ exe, shell_quote(str_args)))
diff --git a/youtube_dl/downloader/dash.py b/youtube_dlc/downloader/dash.py
index c6d674bc6..c6d674bc6 100644
--- a/youtube_dl/downloader/dash.py
+++ b/youtube_dlc/downloader/dash.py
diff --git a/youtube_dl/downloader/external.py b/youtube_dlc/downloader/external.py
index c31f8910a..c31f8910a 100644
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dlc/downloader/external.py
diff --git a/youtube_dl/downloader/f4m.py b/youtube_dlc/downloader/f4m.py
index 8dd3c2eeb..8dd3c2eeb 100644
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dlc/downloader/f4m.py
diff --git a/youtube_dlc/downloader/fragment.py b/youtube_dlc/downloader/fragment.py
new file mode 100644
index 000000000..9339b3a62
--- /dev/null
+++ b/youtube_dlc/downloader/fragment.py
@@ -0,0 +1,269 @@
+from __future__ import division, unicode_literals
+
+import os
+import time
+import json
+
+from .common import FileDownloader
+from .http import HttpFD
+from ..utils import (
+ error_to_compat_str,
+ encodeFilename,
+ sanitize_open,
+ sanitized_Request,
+)
+
+
+class HttpQuietDownloader(HttpFD):
+ def to_screen(self, *args, **kargs):
+ pass
+
+
+class FragmentFD(FileDownloader):
+ """
+ A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests).
+
+ Available options:
+
+ fragment_retries: Number of times to retry a fragment for HTTP error (DASH
+ and hlsnative only)
+ skip_unavailable_fragments:
+ Skip unavailable fragments (DASH and hlsnative only)
+ keep_fragments: Keep downloaded fragments on disk after downloading is
+ finished
+
+ For each incomplete fragment download youtube-dlc keeps on disk a special
+ bookkeeping file with download state and metadata (in future such files will
+ be used for any incomplete download handled by youtube-dlc). This file is
+ used to properly handle resuming, check download file consistency and detect
+ potential errors. The file has a .ytdl extension and represents a standard
+ JSON file of the following format:
+
+ extractor:
+ Dictionary of extractor related data. TBD.
+
+ downloader:
+ Dictionary of downloader related data. May contain following data:
+ current_fragment:
+ Dictionary with current (being downloaded) fragment data:
+ index: 0-based index of current fragment among all fragments
+ fragment_count:
+ Total count of fragments
+
+ This feature is experimental and file format may change in future.
+ """
+
+ def report_retry_fragment(self, err, frag_index, count, retries):
+ self.to_screen(
+ '[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s)...'
+ % (error_to_compat_str(err), frag_index, count, self.format_retries(retries)))
+
+ def report_skip_fragment(self, frag_index):
+ self.to_screen('[download] Skipping fragment %d...' % frag_index)
+
+ def _prepare_url(self, info_dict, url):
+ headers = info_dict.get('http_headers')
+ return sanitized_Request(url, None, headers) if headers else url
+
+ def _prepare_and_start_frag_download(self, ctx):
+ self._prepare_frag_download(ctx)
+ self._start_frag_download(ctx)
+
+ @staticmethod
+ def __do_ytdl_file(ctx):
+ return not ctx['live'] and not ctx['tmpfilename'] == '-'
+
+ def _read_ytdl_file(self, ctx):
+ assert 'ytdl_corrupt' not in ctx
+ stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r')
+ try:
+ ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index']
+ except Exception:
+ ctx['ytdl_corrupt'] = True
+ finally:
+ stream.close()
+
+ def _write_ytdl_file(self, ctx):
+ frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w')
+ downloader = {
+ 'current_fragment': {
+ 'index': ctx['fragment_index'],
+ },
+ }
+ if ctx.get('fragment_count') is not None:
+ downloader['fragment_count'] = ctx['fragment_count']
+ frag_index_stream.write(json.dumps({'downloader': downloader}))
+ frag_index_stream.close()
+
+ def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
+ fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
+ success = ctx['dl'].download(fragment_filename, {
+ 'url': frag_url,
+ 'http_headers': headers or info_dict.get('http_headers'),
+ })
+ if not success:
+ return False, None
+ down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
+ ctx['fragment_filename_sanitized'] = frag_sanitized
+ frag_content = down.read()
+ down.close()
+ return True, frag_content
+
+ def _append_fragment(self, ctx, frag_content):
+ try:
+ ctx['dest_stream'].write(frag_content)
+ ctx['dest_stream'].flush()
+ finally:
+ if self.__do_ytdl_file(ctx):
+ self._write_ytdl_file(ctx)
+ if not self.params.get('keep_fragments', False):
+ os.remove(encodeFilename(ctx['fragment_filename_sanitized']))
+ del ctx['fragment_filename_sanitized']
+
+ def _prepare_frag_download(self, ctx):
+ if 'live' not in ctx:
+ ctx['live'] = False
+ if not ctx['live']:
+ total_frags_str = '%d' % ctx['total_frags']
+ ad_frags = ctx.get('ad_frags', 0)
+ if ad_frags:
+ total_frags_str += ' (not including %d ad)' % ad_frags
+ else:
+ total_frags_str = 'unknown (live)'
+ self.to_screen(
+ '[%s] Total fragments: %s' % (self.FD_NAME, total_frags_str))
+ self.report_destination(ctx['filename'])
+ dl = HttpQuietDownloader(
+ self.ydl,
+ {
+ 'continuedl': True,
+ 'quiet': True,
+ 'noprogress': True,
+ 'ratelimit': self.params.get('ratelimit'),
+ 'retries': self.params.get('retries', 0),
+ 'nopart': self.params.get('nopart', False),
+ 'test': self.params.get('test', False),
+ }
+ )
+ tmpfilename = self.temp_name(ctx['filename'])
+ open_mode = 'wb'
+ resume_len = 0
+
+ # Establish possible resume length
+ if os.path.isfile(encodeFilename(tmpfilename)):
+ open_mode = 'ab'
+ resume_len = os.path.getsize(encodeFilename(tmpfilename))
+
+ # Should be initialized before ytdl file check
+ ctx.update({
+ 'tmpfilename': tmpfilename,
+ 'fragment_index': 0,
+ })
+
+ if self.__do_ytdl_file(ctx):
+ if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))):
+ self._read_ytdl_file(ctx)
+ is_corrupt = ctx.get('ytdl_corrupt') is True
+ is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0
+ if is_corrupt or is_inconsistent:
+ message = (
+ '.ytdl file is corrupt' if is_corrupt else
+ 'Inconsistent state of incomplete fragment download')
+ self.report_warning(
+ '%s. Restarting from the beginning...' % message)
+ ctx['fragment_index'] = resume_len = 0
+ if 'ytdl_corrupt' in ctx:
+ del ctx['ytdl_corrupt']
+ self._write_ytdl_file(ctx)
+ else:
+ self._write_ytdl_file(ctx)
+ assert ctx['fragment_index'] == 0
+
+ dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode)
+
+ ctx.update({
+ 'dl': dl,
+ 'dest_stream': dest_stream,
+ 'tmpfilename': tmpfilename,
+ # Total complete fragments downloaded so far in bytes
+ 'complete_frags_downloaded_bytes': resume_len,
+ })
+
+ def _start_frag_download(self, ctx):
+ resume_len = ctx['complete_frags_downloaded_bytes']
+ total_frags = ctx['total_frags']
+ # This dict stores the download progress, it's updated by the progress
+ # hook
+ state = {
+ 'status': 'downloading',
+ 'downloaded_bytes': resume_len,
+ 'fragment_index': ctx['fragment_index'],
+ 'fragment_count': total_frags,
+ 'filename': ctx['filename'],
+ 'tmpfilename': ctx['tmpfilename'],
+ }
+
+ start = time.time()
+ ctx.update({
+ 'started': start,
+ # Amount of fragment's bytes downloaded by the time of the previous
+ # frag progress hook invocation
+ 'prev_frag_downloaded_bytes': 0,
+ })
+
+ def frag_progress_hook(s):
+ if s['status'] not in ('downloading', 'finished'):
+ return
+
+ time_now = time.time()
+ state['elapsed'] = time_now - start
+ frag_total_bytes = s.get('total_bytes') or 0
+ if not ctx['live']:
+ estimated_size = (
+ (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes)
+ / (state['fragment_index'] + 1) * total_frags)
+ state['total_bytes_estimate'] = estimated_size
+
+ if s['status'] == 'finished':
+ state['fragment_index'] += 1
+ ctx['fragment_index'] = state['fragment_index']
+ state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']
+ ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']
+ ctx['prev_frag_downloaded_bytes'] = 0
+ else:
+ frag_downloaded_bytes = s['downloaded_bytes']
+ state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes']
+ if not ctx['live']:
+ state['eta'] = self.calc_eta(
+ start, time_now, estimated_size - resume_len,
+ state['downloaded_bytes'] - resume_len)
+ state['speed'] = s.get('speed') or ctx.get('speed')
+ ctx['speed'] = state['speed']
+ ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes
+ self._hook_progress(state)
+
+ ctx['dl'].add_progress_hook(frag_progress_hook)
+
+ return start
+
+ def _finish_frag_download(self, ctx):
+ ctx['dest_stream'].close()
+ if self.__do_ytdl_file(ctx):
+ ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename']))
+ if os.path.isfile(ytdl_filename):
+ os.remove(ytdl_filename)
+ elapsed = time.time() - ctx['started']
+
+ if ctx['tmpfilename'] == '-':
+ downloaded_bytes = ctx['complete_frags_downloaded_bytes']
+ else:
+ self.try_rename(ctx['tmpfilename'], ctx['filename'])
+ downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
+
+ self._hook_progress({
+ 'downloaded_bytes': downloaded_bytes,
+ 'total_bytes': downloaded_bytes,
+ 'filename': ctx['filename'],
+ 'status': 'finished',
+ 'elapsed': elapsed,
+ })
diff --git a/youtube_dlc/downloader/hls.py b/youtube_dlc/downloader/hls.py
new file mode 100644
index 000000000..84bc34928
--- /dev/null
+++ b/youtube_dlc/downloader/hls.py
@@ -0,0 +1,210 @@
+from __future__ import unicode_literals
+
+import re
+import binascii
+try:
+ from Crypto.Cipher import AES
+ can_decrypt_frag = True
+except ImportError:
+ can_decrypt_frag = False
+
+from .fragment import FragmentFD
+from .external import FFmpegFD
+
+from ..compat import (
+ compat_urllib_error,
+ compat_urlparse,
+ compat_struct_pack,
+)
+from ..utils import (
+ parse_m3u8_attributes,
+ update_url_query,
+)
+
+
+class HlsFD(FragmentFD):
+ """ A limited implementation that does not require ffmpeg """
+
+ FD_NAME = 'hlsnative'
+
+ @staticmethod
+ def can_download(manifest, info_dict):
+ UNSUPPORTED_FEATURES = (
+ r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
+ # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
+
+ # Live streams heuristic does not always work (e.g. geo restricted to Germany
+ # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0)
+ # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3]
+
+ # This heuristic also is not correct since segments may not be appended as well.
+ # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite
+ # no segments will definitely be appended to the end of the playlist.
+ # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of
+ # # event media playlists [4]
+
+ # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
+ # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
+ # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
+ # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
+ )
+ check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
+ is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest
+ check_results.append(can_decrypt_frag or not is_aes128_enc)
+ check_results.append(not (is_aes128_enc and r'#EXT-X-BYTERANGE' in manifest))
+ check_results.append(not info_dict.get('is_live'))
+ return all(check_results)
+
+ def real_download(self, filename, info_dict):
+ man_url = info_dict['url']
+ self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
+
+ urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url))
+ man_url = urlh.geturl()
+ s = urlh.read().decode('utf-8', 'ignore')
+
+ if not self.can_download(s, info_dict):
+ if info_dict.get('extra_param_to_segment_url') or info_dict.get('_decryption_key_url'):
+ self.report_error('pycrypto not found. Please install it.')
+ return False
+ self.report_warning(
+ 'hlsnative has detected features it does not support, '
+ 'extraction will be delegated to ffmpeg')
+ fd = FFmpegFD(self.ydl, self.params)
+ for ph in self._progress_hooks:
+ fd.add_progress_hook(ph)
+ return fd.real_download(filename, info_dict)
+
+ def is_ad_fragment_start(s):
+ return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s
+ or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad'))
+
+ def is_ad_fragment_end(s):
+ return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s
+ or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment'))
+
+ media_frags = 0
+ ad_frags = 0
+ ad_frag_next = False
+ for line in s.splitlines():
+ line = line.strip()
+ if not line:
+ continue
+ if line.startswith('#'):
+ if is_ad_fragment_start(line):
+ ad_frag_next = True
+ elif is_ad_fragment_end(line):
+ ad_frag_next = False
+ continue
+ if ad_frag_next:
+ ad_frags += 1
+ continue
+ media_frags += 1
+
+ ctx = {
+ 'filename': filename,
+ 'total_frags': media_frags,
+ 'ad_frags': ad_frags,
+ }
+
+ self._prepare_and_start_frag_download(ctx)
+
+ fragment_retries = self.params.get('fragment_retries', 0)
+ skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
+ test = self.params.get('test', False)
+
+ extra_query = None
+ extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url')
+ if extra_param_to_segment_url:
+ extra_query = compat_urlparse.parse_qs(extra_param_to_segment_url)
+ i = 0
+ media_sequence = 0
+ decrypt_info = {'METHOD': 'NONE'}
+ byte_range = {}
+ frag_index = 0
+ ad_frag_next = False
+ for line in s.splitlines():
+ line = line.strip()
+ if line:
+ if not line.startswith('#'):
+ if ad_frag_next:
+ continue
+ frag_index += 1
+ if frag_index <= ctx['fragment_index']:
+ continue
+ frag_url = (
+ line
+ if re.match(r'^https?://', line)
+ else compat_urlparse.urljoin(man_url, line))
+ if extra_query:
+ frag_url = update_url_query(frag_url, extra_query)
+ count = 0
+ headers = info_dict.get('http_headers', {})
+ if byte_range:
+ headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'])
+ while count <= fragment_retries:
+ try:
+ success, frag_content = self._download_fragment(
+ ctx, frag_url, info_dict, headers)
+ if not success:
+ return False
+ break
+ except compat_urllib_error.HTTPError as err:
+ # Unavailable (possibly temporary) fragments may be served.
+ # First we try to retry then either skip or abort.
+ # See https://github.com/ytdl-org/youtube-dl/issues/10165,
+ # https://github.com/ytdl-org/youtube-dl/issues/10448).
+ count += 1
+ if count <= fragment_retries:
+ self.report_retry_fragment(err, frag_index, count, fragment_retries)
+ if count > fragment_retries:
+ if skip_unavailable_fragments:
+ i += 1
+ media_sequence += 1
+ self.report_skip_fragment(frag_index)
+ continue
+ self.report_error(
+ 'giving up after %s fragment retries' % fragment_retries)
+ return False
+ if decrypt_info['METHOD'] == 'AES-128':
+ iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
+ decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
+ self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read()
+ frag_content = AES.new(
+ decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
+ self._append_fragment(ctx, frag_content)
+ # We only download the first fragment during the test
+ if test:
+ break
+ i += 1
+ media_sequence += 1
+ elif line.startswith('#EXT-X-KEY'):
+ decrypt_url = decrypt_info.get('URI')
+ decrypt_info = parse_m3u8_attributes(line[11:])
+ if decrypt_info['METHOD'] == 'AES-128':
+ if 'IV' in decrypt_info:
+ decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32))
+ if not re.match(r'^https?://', decrypt_info['URI']):
+ decrypt_info['URI'] = compat_urlparse.urljoin(
+ man_url, decrypt_info['URI'])
+ if extra_query:
+ decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query)
+ if decrypt_url != decrypt_info['URI']:
+ decrypt_info['KEY'] = None
+ elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
+ media_sequence = int(line[22:])
+ elif line.startswith('#EXT-X-BYTERANGE'):
+ splitted_byte_range = line[17:].split('@')
+ sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end']
+ byte_range = {
+ 'start': sub_range_start,
+ 'end': sub_range_start + int(splitted_byte_range[0]),
+ }
+ elif is_ad_fragment_start(line):
+ ad_frag_next = True
+ elif is_ad_fragment_end(line):
+ ad_frag_next = False
+
+ self._finish_frag_download(ctx)
+
+ return True
diff --git a/youtube_dlc/downloader/http.py b/youtube_dlc/downloader/http.py
new file mode 100644
index 000000000..5046878df
--- /dev/null
+++ b/youtube_dlc/downloader/http.py
@@ -0,0 +1,354 @@
+from __future__ import unicode_literals
+
+import errno
+import os
+import socket
+import time
+import random
+import re
+
+from .common import FileDownloader
+from ..compat import (
+ compat_str,
+ compat_urllib_error,
+)
+from ..utils import (
+ ContentTooShortError,
+ encodeFilename,
+ int_or_none,
+ sanitize_open,
+ sanitized_Request,
+ write_xattr,
+ XAttrMetadataError,
+ XAttrUnavailableError,
+)
+
+
+class HttpFD(FileDownloader):
+ def real_download(self, filename, info_dict):
+ url = info_dict['url']
+
+ class DownloadContext(dict):
+ __getattr__ = dict.get
+ __setattr__ = dict.__setitem__
+ __delattr__ = dict.__delitem__
+
+ ctx = DownloadContext()
+ ctx.filename = filename
+ ctx.tmpfilename = self.temp_name(filename)
+ ctx.stream = None
+
+ # Do not include the Accept-Encoding header
+ headers = {'Youtubedl-no-compression': 'True'}
+ add_headers = info_dict.get('http_headers')
+ if add_headers:
+ headers.update(add_headers)
+
+ is_test = self.params.get('test', False)
+ chunk_size = self._TEST_FILE_SIZE if is_test else (
+ info_dict.get('downloader_options', {}).get('http_chunk_size')
+ or self.params.get('http_chunk_size') or 0)
+
+ ctx.open_mode = 'wb'
+ ctx.resume_len = 0
+ ctx.data_len = None
+ ctx.block_size = self.params.get('buffersize', 1024)
+ ctx.start_time = time.time()
+ ctx.chunk_size = None
+
+ if self.params.get('continuedl', True):
+ # Establish possible resume length
+ if os.path.isfile(encodeFilename(ctx.tmpfilename)):
+ ctx.resume_len = os.path.getsize(
+ encodeFilename(ctx.tmpfilename))
+
+ ctx.is_resume = ctx.resume_len > 0
+
+ count = 0
+ retries = self.params.get('retries', 0)
+
+ class SucceedDownload(Exception):
+ pass
+
+ class RetryDownload(Exception):
+ def __init__(self, source_error):
+ self.source_error = source_error
+
+ class NextFragment(Exception):
+ pass
+
+ def set_range(req, start, end):
+ range_header = 'bytes=%d-' % start
+ if end:
+ range_header += compat_str(end)
+ req.add_header('Range', range_header)
+
+ def establish_connection():
+ ctx.chunk_size = (random.randint(int(chunk_size * 0.95), chunk_size)
+ if not is_test and chunk_size else chunk_size)
+ if ctx.resume_len > 0:
+ range_start = ctx.resume_len
+ if ctx.is_resume:
+ self.report_resuming_byte(ctx.resume_len)
+ ctx.open_mode = 'ab'
+ elif ctx.chunk_size > 0:
+ range_start = 0
+ else:
+ range_start = None
+ ctx.is_resume = False
+ range_end = range_start + ctx.chunk_size - 1 if ctx.chunk_size else None
+ if range_end and ctx.data_len is not None and range_end >= ctx.data_len:
+ range_end = ctx.data_len - 1
+ has_range = range_start is not None
+ ctx.has_range = has_range
+ request = sanitized_Request(url, None, headers)
+ if has_range:
+ set_range(request, range_start, range_end)
+ # Establish connection
+ try:
+ ctx.data = self.ydl.urlopen(request)
+ # When trying to resume, Content-Range HTTP header of response has to be checked
+ # to match the value of requested Range HTTP header. This is due to a webservers
+ # that don't support resuming and serve a whole file with no Content-Range
+ # set in response despite of requested Range (see
+ # https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799)
+ if has_range:
+ content_range = ctx.data.headers.get('Content-Range')
+ if content_range:
+ content_range_m = re.search(r'bytes (\d+)-(\d+)?(?:/(\d+))?', content_range)
+ # Content-Range is present and matches requested Range, resume is possible
+ if content_range_m:
+ if range_start == int(content_range_m.group(1)):
+ content_range_end = int_or_none(content_range_m.group(2))
+ content_len = int_or_none(content_range_m.group(3))
+ accept_content_len = (
+ # Non-chunked download
+ not ctx.chunk_size
+ # Chunked download and requested piece or
+ # its part is promised to be served
+ or content_range_end == range_end
+ or content_len < range_end)
+ if accept_content_len:
+ ctx.data_len = content_len
+ return
+ # Content-Range is either not present or invalid. Assuming remote webserver is
+ # trying to send the whole file, resume is not possible, so wiping the local file
+ # and performing entire redownload
+ self.report_unable_to_resume()
+ ctx.resume_len = 0
+ ctx.open_mode = 'wb'
+ ctx.data_len = int_or_none(ctx.data.info().get('Content-length', None))
+ return
+ except (compat_urllib_error.HTTPError, ) as err:
+ if err.code == 416:
+ # Unable to resume (requested range not satisfiable)
+ try:
+ # Open the connection again without the range header
+ ctx.data = self.ydl.urlopen(
+ sanitized_Request(url, None, headers))
+ content_length = ctx.data.info()['Content-Length']
+ except (compat_urllib_error.HTTPError, ) as err:
+ if err.code < 500 or err.code >= 600:
+ raise
+ else:
+ # Examine the reported length
+ if (content_length is not None
+ and (ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)):
+ # The file had already been fully downloaded.
+ # Explanation to the above condition: in issue #175 it was revealed that
+ # YouTube sometimes adds or removes a few bytes from the end of the file,
+ # changing the file size slightly and causing problems for some users. So
+ # I decided to implement a suggested change and consider the file
+ # completely downloaded if the file size differs less than 100 bytes from
+ # the one in the hard drive.
+ self.report_file_already_downloaded(ctx.filename)
+ self.try_rename(ctx.tmpfilename, ctx.filename)
+ self._hook_progress({
+ 'filename': ctx.filename,
+ 'status': 'finished',
+ 'downloaded_bytes': ctx.resume_len,
+ 'total_bytes': ctx.resume_len,
+ })
+ raise SucceedDownload()
+ else:
+ # The length does not match, we start the download over
+ self.report_unable_to_resume()
+ ctx.resume_len = 0
+ ctx.open_mode = 'wb'
+ return
+ elif err.code < 500 or err.code >= 600:
+ # Unexpected HTTP error
+ raise
+ raise RetryDownload(err)
+ except socket.error as err:
+ if err.errno != errno.ECONNRESET:
+ # Connection reset is no problem, just retry
+ raise
+ raise RetryDownload(err)
+
+ def download():
+ data_len = ctx.data.info().get('Content-length', None)
+
+ # Range HTTP header may be ignored/unsupported by a webserver
+ # (e.g. extractor/scivee.py, extractor/bambuser.py).
+ # However, for a test we still would like to download just a piece of a file.
+ # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control
+ # block size when downloading a file.
+ if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE):
+ data_len = self._TEST_FILE_SIZE
+
+ if data_len is not None:
+ data_len = int(data_len) + ctx.resume_len
+ min_data_len = self.params.get('min_filesize')
+ max_data_len = self.params.get('max_filesize')
+ if min_data_len is not None and data_len < min_data_len:
+ self.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
+ return False
+ if max_data_len is not None and data_len > max_data_len:
+ self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
+ return False
+
+ byte_counter = 0 + ctx.resume_len
+ block_size = ctx.block_size
+ start = time.time()
+
+ # measure time over whole while-loop, so slow_down() and best_block_size() work together properly
+ now = None # needed for slow_down() in the first loop run
+ before = start # start measuring
+
+ def retry(e):
+ to_stdout = ctx.tmpfilename == '-'
+ if not to_stdout:
+ ctx.stream.close()
+ ctx.stream = None
+ ctx.resume_len = byte_counter if to_stdout else os.path.getsize(encodeFilename(ctx.tmpfilename))
+ raise RetryDownload(e)
+
+ while True:
+ try:
+ # Download and write
+ data_block = ctx.data.read(block_size if data_len is None else min(block_size, data_len - byte_counter))
+ # socket.timeout is a subclass of socket.error but may not have
+ # errno set
+ except socket.timeout as e:
+ retry(e)
+ except socket.error as e:
+ if e.errno not in (errno.ECONNRESET, errno.ETIMEDOUT):
+ raise
+ retry(e)
+
+ byte_counter += len(data_block)
+
+ # exit loop when download is finished
+ if len(data_block) == 0:
+ break
+
+ # Open destination file just in time
+ if ctx.stream is None:
+ try:
+ ctx.stream, ctx.tmpfilename = sanitize_open(
+ ctx.tmpfilename, ctx.open_mode)
+ assert ctx.stream is not None
+ ctx.filename = self.undo_temp_name(ctx.tmpfilename)
+ self.report_destination(ctx.filename)
+ except (OSError, IOError) as err:
+ self.report_error('unable to open for writing: %s' % str(err))
+ return False
+
+ if self.params.get('xattr_set_filesize', False) and data_len is not None:
+ try:
+ write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode('utf-8'))
+ except (XAttrUnavailableError, XAttrMetadataError) as err:
+ self.report_error('unable to set filesize xattr: %s' % str(err))
+
+ try:
+ ctx.stream.write(data_block)
+ except (IOError, OSError) as err:
+ self.to_stderr('\n')
+ self.report_error('unable to write data: %s' % str(err))
+ return False
+
+ # Apply rate limit
+ self.slow_down(start, now, byte_counter - ctx.resume_len)
+
+ # end measuring of one loop run
+ now = time.time()
+ after = now
+
+ # Adjust block size
+ if not self.params.get('noresizebuffer', False):
+ block_size = self.best_block_size(after - before, len(data_block))
+
+ before = after
+
+ # Progress message
+ speed = self.calc_speed(start, now, byte_counter - ctx.resume_len)
+ if ctx.data_len is None:
+ eta = None
+ else:
+ eta = self.calc_eta(start, time.time(), ctx.data_len - ctx.resume_len, byte_counter - ctx.resume_len)
+
+ self._hook_progress({
+ 'status': 'downloading',
+ 'downloaded_bytes': byte_counter,
+ 'total_bytes': ctx.data_len,
+ 'tmpfilename': ctx.tmpfilename,
+ 'filename': ctx.filename,
+ 'eta': eta,
+ 'speed': speed,
+ 'elapsed': now - ctx.start_time,
+ })
+
+ if data_len is not None and byte_counter == data_len:
+ break
+
+ if not is_test and ctx.chunk_size and ctx.data_len is not None and byte_counter < ctx.data_len:
+ ctx.resume_len = byte_counter
+ # ctx.block_size = block_size
+ raise NextFragment()
+
+ if ctx.stream is None:
+ self.to_stderr('\n')
+ self.report_error('Did not get any data blocks')
+ return False
+ if ctx.tmpfilename != '-':
+ ctx.stream.close()
+
+ if data_len is not None and byte_counter != data_len:
+ err = ContentTooShortError(byte_counter, int(data_len))
+ if count <= retries:
+ retry(err)
+ raise err
+
+ self.try_rename(ctx.tmpfilename, ctx.filename)
+
+ # Update file modification time
+ if self.params.get('updatetime', True):
+ info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.info().get('last-modified', None))
+
+ self._hook_progress({
+ 'downloaded_bytes': byte_counter,
+ 'total_bytes': byte_counter,
+ 'filename': ctx.filename,
+ 'status': 'finished',
+ 'elapsed': time.time() - ctx.start_time,
+ })
+
+ return True
+
+ while count <= retries:
+ try:
+ establish_connection()
+ return download()
+ except RetryDownload as e:
+ count += 1
+ if count <= retries:
+ self.report_retry(e.source_error, count, retries)
+ continue
+ except NextFragment:
+ continue
+ except SucceedDownload:
+ return True
+
+ self.report_error('giving up after %s retries' % retries)
+ return False
diff --git a/youtube_dl/downloader/ism.py b/youtube_dlc/downloader/ism.py
index 1ca666b4a..1ca666b4a 100644
--- a/youtube_dl/downloader/ism.py
+++ b/youtube_dlc/downloader/ism.py
diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dlc/downloader/rtmp.py
index fbb7f51b0..fbb7f51b0 100644
--- a/youtube_dl/downloader/rtmp.py
+++ b/youtube_dlc/downloader/rtmp.py
diff --git a/youtube_dl/downloader/rtsp.py b/youtube_dlc/downloader/rtsp.py
index 939358b2a..939358b2a 100644
--- a/youtube_dl/downloader/rtsp.py
+++ b/youtube_dlc/downloader/rtsp.py
diff --git a/youtube_dlc/downloader/youtube_live_chat.py b/youtube_dlc/downloader/youtube_live_chat.py
new file mode 100644
index 000000000..4932dd9c5
--- /dev/null
+++ b/youtube_dlc/downloader/youtube_live_chat.py
@@ -0,0 +1,94 @@
+from __future__ import division, unicode_literals
+
+import re
+import json
+
+from .fragment import FragmentFD
+
+
+class YoutubeLiveChatReplayFD(FragmentFD):
+ """ Downloads YouTube live chat replays fragment by fragment """
+
+ FD_NAME = 'youtube_live_chat_replay'
+
+ def real_download(self, filename, info_dict):
+ video_id = info_dict['video_id']
+ self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
+
+ test = self.params.get('test', False)
+
+ ctx = {
+ 'filename': filename,
+ 'live': True,
+ 'total_frags': None,
+ }
+
+ def dl_fragment(url):
+ headers = info_dict.get('http_headers', {})
+ return self._download_fragment(ctx, url, info_dict, headers)
+
+ def parse_yt_initial_data(data):
+ window_patt = b'window\\["ytInitialData"\\]\\s*=\\s*(.*?)(?<=});'
+ var_patt = b'var\\s+ytInitialData\\s*=\\s*(.*?)(?<=});'
+ for patt in window_patt, var_patt:
+ try:
+ raw_json = re.search(patt, data).group(1)
+ return json.loads(raw_json)
+ except AttributeError:
+ continue
+
+ self._prepare_and_start_frag_download(ctx)
+
+ success, raw_fragment = dl_fragment(
+ 'https://www.youtube.com/watch?v={}'.format(video_id))
+ if not success:
+ return False
+ data = parse_yt_initial_data(raw_fragment)
+ continuation_id = data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
+ # no data yet but required to call _append_fragment
+ self._append_fragment(ctx, b'')
+
+ first = True
+ offset = None
+ while continuation_id is not None:
+ data = None
+ if first:
+ url = 'https://www.youtube.com/live_chat_replay?continuation={}'.format(continuation_id)
+ success, raw_fragment = dl_fragment(url)
+ if not success:
+ return False
+ data = parse_yt_initial_data(raw_fragment)
+ else:
+ url = ('https://www.youtube.com/live_chat_replay/get_live_chat_replay'
+ + '?continuation={}'.format(continuation_id)
+ + '&playerOffsetMs={}'.format(offset - 5000)
+ + '&hidden=false'
+ + '&pbj=1')
+ success, raw_fragment = dl_fragment(url)
+ if not success:
+ return False
+ data = json.loads(raw_fragment)['response']
+
+ first = False
+ continuation_id = None
+
+ live_chat_continuation = data['continuationContents']['liveChatContinuation']
+ offset = None
+ processed_fragment = bytearray()
+ if 'actions' in live_chat_continuation:
+ for action in live_chat_continuation['actions']:
+ if 'replayChatItemAction' in action:
+ replay_chat_item_action = action['replayChatItemAction']
+ offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
+ processed_fragment.extend(
+ json.dumps(action, ensure_ascii=False).encode('utf-8') + b'\n')
+ continuation_id = live_chat_continuation['continuations'][0]['liveChatReplayContinuationData']['continuation']
+
+ self._append_fragment(ctx, processed_fragment)
+
+ if test or offset is None:
+ break
+
+ self._finish_frag_download(ctx)
+
+ return True
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dlc/extractor/__init__.py
index 18d8dbcd6..18d8dbcd6 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dlc/extractor/__init__.py
diff --git a/youtube_dlc/extractor/abc.py b/youtube_dlc/extractor/abc.py
new file mode 100644
index 000000000..6637f4f35
--- /dev/null
+++ b/youtube_dlc/extractor/abc.py
@@ -0,0 +1,193 @@
+from __future__ import unicode_literals
+
+import hashlib
+import hmac
+import re
+import time
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ js_to_json,
+ int_or_none,
+ parse_iso8601,
+ try_get,
+ unescapeHTML,
+ update_url_query,
+)
+
+
+class ABCIE(InfoExtractor):
+ IE_NAME = 'abc.net.au'
+ _VALID_URL = r'https?://(?:www\.)?abc\.net\.au/news/(?:[^/]+/){1,2}(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334',
+ 'md5': 'cb3dd03b18455a661071ee1e28344d9f',
+ 'info_dict': {
+ 'id': '5868334',
+ 'ext': 'mp4',
+ 'title': 'Australia to help staff Ebola treatment centre in Sierra Leone',
+ 'description': 'md5:809ad29c67a05f54eb41f2a105693a67',
+ },
+ 'skip': 'this video has expired',
+ }, {
+ 'url': 'http://www.abc.net.au/news/2015-08-17/warren-entsch-introduces-same-sex-marriage-bill/6702326',
+ 'md5': 'db2a5369238b51f9811ad815b69dc086',
+ 'info_dict': {
+ 'id': 'NvqvPeNZsHU',
+ 'ext': 'mp4',
+ 'upload_date': '20150816',
+ 'uploader': 'ABC News (Australia)',
+ 'description': 'Government backbencher Warren Entsch introduces a cross-party sponsored bill to legalise same-sex marriage, saying the bill is designed to promote "an inclusive Australia, not a divided one.". Read more here: http://ab.co/1Mwc6ef',
+ 'uploader_id': 'NewsOnABC',
+ 'title': 'Marriage Equality: Warren Entsch introduces same sex marriage bill',
+ },
+ 'add_ie': ['Youtube'],
+ 'skip': 'Not accessible from Travis CI server',
+ }, {
+ 'url': 'http://www.abc.net.au/news/2015-10-23/nab-lifts-interest-rates-following-westpac-and-cba/6880080',
+ 'md5': 'b96eee7c9edf4fc5a358a0252881cc1f',
+ 'info_dict': {
+ 'id': '6880080',
+ 'ext': 'mp3',
+ 'title': 'NAB lifts interest rates, following Westpac and CBA',
+ 'description': 'md5:f13d8edc81e462fce4a0437c7dc04728',
+ },
+ }, {
+ 'url': 'http://www.abc.net.au/news/2015-10-19/6866214',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ mobj = re.search(
+ r'inline(?P<type>Video|Audio|YouTube)Data\.push\((?P<json_data>[^)]+)\);',
+ webpage)
+ if mobj is None:
+ expired = self._html_search_regex(r'(?s)class="expired-(?:video|audio)".+?<span>(.+?)</span>', webpage, 'expired', None)
+ if expired:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, expired), expected=True)
+ raise ExtractorError('Unable to extract video urls')
+
+ urls_info = self._parse_json(
+ mobj.group('json_data'), video_id, transform_source=js_to_json)
+
+ if not isinstance(urls_info, list):
+ urls_info = [urls_info]
+
+ if mobj.group('type') == 'YouTube':
+ return self.playlist_result([
+ self.url_result(url_info['url']) for url_info in urls_info])
+
+ formats = [{
+ 'url': url_info['url'],
+ 'vcodec': url_info.get('codec') if mobj.group('type') == 'Video' else 'none',
+ 'width': int_or_none(url_info.get('width')),
+ 'height': int_or_none(url_info.get('height')),
+ 'tbr': int_or_none(url_info.get('bitrate')),
+ 'filesize': int_or_none(url_info.get('filesize')),
+ } for url_info in urls_info]
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': self._og_search_title(webpage),
+ 'formats': formats,
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ }
+
+
+class ABCIViewIE(InfoExtractor):
+ IE_NAME = 'abc.net.au:iview'
+ _VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P<id>[^/?#]+)'
+ _GEO_COUNTRIES = ['AU']
+
+ # ABC iview programs are normally available for 14 days only.
+ _TESTS = [{
+ 'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00',
+ 'md5': '67715ce3c78426b11ba167d875ac6abf',
+ 'info_dict': {
+ 'id': 'LE1927H001S00',
+ 'ext': 'mp4',
+ 'title': "Series 11 Ep 1",
+ 'series': "Gruen",
+ 'description': 'md5:52cc744ad35045baf6aded2ce7287f67',
+ 'upload_date': '20190925',
+ 'uploader_id': 'abc1',
+ 'timestamp': 1569445289,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_params = self._download_json(
+ 'https://iview.abc.net.au/api/programs/' + video_id, video_id)
+ title = unescapeHTML(video_params.get('title') or video_params['seriesTitle'])
+ stream = next(s for s in video_params['playlist'] if s.get('type') in ('program', 'livestream'))
+
+ house_number = video_params.get('episodeHouseNumber') or video_id
+ path = '/auth/hls/sign?ts={0}&hn={1}&d=android-tablet'.format(
+ int(time.time()), house_number)
+ sig = hmac.new(
+ b'android.content.res.Resources',
+ path.encode('utf-8'), hashlib.sha256).hexdigest()
+ token = self._download_webpage(
+ 'http://iview.abc.net.au{0}&sig={1}'.format(path, sig), video_id)
+
+ def tokenize_url(url, token):
+ return update_url_query(url, {
+ 'hdnea': token,
+ })
+
+ for sd in ('720', 'sd', 'sd-low'):
+ sd_url = try_get(
+ stream, lambda x: x['streams']['hls'][sd], compat_str)
+ if not sd_url:
+ continue
+ formats = self._extract_m3u8_formats(
+ tokenize_url(sd_url, token), video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+ if formats:
+ break
+ self._sort_formats(formats)
+
+ subtitles = {}
+ src_vtt = stream.get('captions', {}).get('src-vtt')
+ if src_vtt:
+ subtitles['en'] = [{
+ 'url': src_vtt,
+ 'ext': 'vtt',
+ }]
+
+ is_live = video_params.get('livestream') == '1'
+ if is_live:
+ title = self._live_title(title)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_params.get('description'),
+ 'thumbnail': video_params.get('thumbnail'),
+ 'duration': int_or_none(video_params.get('eventDuration')),
+ 'timestamp': parse_iso8601(video_params.get('pubDate'), ' '),
+ 'series': unescapeHTML(video_params.get('seriesTitle')),
+ 'series_id': video_params.get('seriesHouseNumber') or video_id[:7],
+ 'season_number': int_or_none(self._search_regex(
+ r'\bSeries\s+(\d+)\b', title, 'season number', default=None)),
+ 'episode_number': int_or_none(self._search_regex(
+ r'\bEp\s+(\d+)\b', title, 'episode number', default=None)),
+ 'episode_id': house_number,
+ 'uploader_id': video_params.get('channel'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': is_live,
+ }
diff --git a/youtube_dl/extractor/abcnews.py b/youtube_dlc/extractor/abcnews.py
index 8b407bf9c..8b407bf9c 100644
--- a/youtube_dl/extractor/abcnews.py
+++ b/youtube_dlc/extractor/abcnews.py
diff --git a/youtube_dlc/extractor/abcotvs.py b/youtube_dlc/extractor/abcotvs.py
new file mode 100644
index 000000000..0bc69a64f
--- /dev/null
+++ b/youtube_dlc/extractor/abcotvs.py
@@ -0,0 +1,137 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ dict_get,
+ int_or_none,
+ try_get,
+)
+
+
+class ABCOTVSIE(InfoExtractor):
+ IE_NAME = 'abcotvs'
+ IE_DESC = 'ABC Owned Television Stations'
+ _VALID_URL = r'https?://(?P<site>abc(?:7(?:news|ny|chicago)?|11|13|30)|6abc)\.com(?:(?:/[^/]+)*/(?P<display_id>[^/]+))?/(?P<id>\d+)'
+ _TESTS = [
+ {
+ 'url': 'http://abc7news.com/entertainment/east-bay-museum-celebrates-vintage-synthesizers/472581/',
+ 'info_dict': {
+ 'id': '472548',
+ 'display_id': 'east-bay-museum-celebrates-vintage-synthesizers',
+ 'ext': 'mp4',
+ 'title': 'East Bay museum celebrates synthesized music',
+ 'description': 'md5:24ed2bd527096ec2a5c67b9d5a9005f3',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1421118520,
+ 'upload_date': '20150113',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://abc7news.com/472581',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://6abc.com/man-75-killed-after-being-struck-by-vehicle-in-chester/5725182/',
+ 'only_matching': True,
+ },
+ ]
+ _SITE_MAP = {
+ '6abc': 'wpvi',
+ 'abc11': 'wtvd',
+ 'abc13': 'ktrk',
+ 'abc30': 'kfsn',
+ 'abc7': 'kabc',
+ 'abc7chicago': 'wls',
+ 'abc7news': 'kgo',
+ 'abc7ny': 'wabc',
+ }
+
+ def _real_extract(self, url):
+ site, display_id, video_id = re.match(self._VALID_URL, url).groups()
+ display_id = display_id or video_id
+ station = self._SITE_MAP[site]
+
+ data = self._download_json(
+ 'https://api.abcotvs.com/v2/content', display_id, query={
+ 'id': video_id,
+ 'key': 'otv.web.%s.story' % station,
+ 'station': station,
+ })['data']
+ video = try_get(data, lambda x: x['featuredMedia']['video'], dict) or data
+ video_id = compat_str(dict_get(video, ('id', 'publishedKey'), video_id))
+ title = video.get('title') or video['linkText']
+
+ formats = []
+ m3u8_url = video.get('m3u8')
+ if m3u8_url:
+ formats = self._extract_m3u8_formats(
+ video['m3u8'].split('?')[0], display_id, 'mp4', m3u8_id='hls', fatal=False)
+ mp4_url = video.get('mp4')
+ if mp4_url:
+ formats.append({
+ 'abr': 128,
+ 'format_id': 'https',
+ 'height': 360,
+ 'url': mp4_url,
+ 'width': 640,
+ })
+ self._sort_formats(formats)
+
+ image = video.get('image') or {}
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': dict_get(video, ('description', 'caption'), try_get(video, lambda x: x['meta']['description'])),
+ 'thumbnail': dict_get(image, ('source', 'dynamicSource')),
+ 'timestamp': int_or_none(video.get('date')),
+ 'duration': int_or_none(video.get('length')),
+ 'formats': formats,
+ }
+
+
+class ABCOTVSClipsIE(InfoExtractor):
+ IE_NAME = 'abcotvs:clips'
+ _VALID_URL = r'https?://clips\.abcotvs\.com/(?:[^/]+/)*video/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://clips.abcotvs.com/kabc/video/214814',
+ 'info_dict': {
+ 'id': '214814',
+ 'ext': 'mp4',
+ 'title': 'SpaceX launch pad explosion destroys rocket, satellite',
+ 'description': 'md5:9f186e5ad8f490f65409965ee9c7be1b',
+ 'upload_date': '20160901',
+ 'timestamp': 1472756695,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._download_json('https://clips.abcotvs.com/vogo/video/getByIds?ids=' + video_id, video_id)['results'][0]
+ title = video_data['title']
+ formats = self._extract_m3u8_formats(
+ video_data['videoURL'].split('?')[0], video_id, 'mp4')
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'thumbnail': video_data.get('thumbnailURL'),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'timestamp': int_or_none(video_data.get('pubDate')),
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/academicearth.py b/youtube_dlc/extractor/academicearth.py
index 34095501c..34095501c 100644
--- a/youtube_dl/extractor/academicearth.py
+++ b/youtube_dlc/extractor/academicearth.py
diff --git a/youtube_dl/extractor/acast.py b/youtube_dlc/extractor/acast.py
index b17c792d2..b17c792d2 100644
--- a/youtube_dl/extractor/acast.py
+++ b/youtube_dlc/extractor/acast.py
diff --git a/youtube_dl/extractor/adn.py b/youtube_dlc/extractor/adn.py
index c95ad2173..c95ad2173 100644
--- a/youtube_dl/extractor/adn.py
+++ b/youtube_dlc/extractor/adn.py
diff --git a/youtube_dl/extractor/adobeconnect.py b/youtube_dlc/extractor/adobeconnect.py
index 728549eb9..728549eb9 100644
--- a/youtube_dl/extractor/adobeconnect.py
+++ b/youtube_dlc/extractor/adobeconnect.py
diff --git a/youtube_dl/extractor/adobepass.py b/youtube_dlc/extractor/adobepass.py
index 38dca1b0a..38dca1b0a 100644
--- a/youtube_dl/extractor/adobepass.py
+++ b/youtube_dlc/extractor/adobepass.py
diff --git a/youtube_dlc/extractor/adobetv.py b/youtube_dlc/extractor/adobetv.py
new file mode 100644
index 000000000..80060f037
--- /dev/null
+++ b/youtube_dlc/extractor/adobetv.py
@@ -0,0 +1,288 @@
+from __future__ import unicode_literals
+
+import functools
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ ISO639Utils,
+ OnDemandPagedList,
+ parse_duration,
+ str_or_none,
+ str_to_int,
+ unified_strdate,
+)
+
+
+class AdobeTVBaseIE(InfoExtractor):
+ def _call_api(self, path, video_id, query, note=None):
+ return self._download_json(
+ 'http://tv.adobe.com/api/v4/' + path,
+ video_id, note, query=query)['data']
+
+ def _parse_subtitles(self, video_data, url_key):
+ subtitles = {}
+ for translation in video_data.get('translations', []):
+ vtt_path = translation.get(url_key)
+ if not vtt_path:
+ continue
+ lang = translation.get('language_w3c') or ISO639Utils.long2short(translation['language_medium'])
+ subtitles.setdefault(lang, []).append({
+ 'ext': 'vtt',
+ 'url': vtt_path,
+ })
+ return subtitles
+
+ def _parse_video_data(self, video_data):
+ video_id = compat_str(video_data['id'])
+ title = video_data['title']
+
+ s3_extracted = False
+ formats = []
+ for source in video_data.get('videos', []):
+ source_url = source.get('url')
+ if not source_url:
+ continue
+ f = {
+ 'format_id': source.get('quality_level'),
+ 'fps': int_or_none(source.get('frame_rate')),
+ 'height': int_or_none(source.get('height')),
+ 'tbr': int_or_none(source.get('video_data_rate')),
+ 'width': int_or_none(source.get('width')),
+ 'url': source_url,
+ }
+ original_filename = source.get('original_filename')
+ if original_filename:
+ if not (f.get('height') and f.get('width')):
+ mobj = re.search(r'_(\d+)x(\d+)', original_filename)
+ if mobj:
+ f.update({
+ 'height': int(mobj.group(2)),
+ 'width': int(mobj.group(1)),
+ })
+ if original_filename.startswith('s3://') and not s3_extracted:
+ formats.append({
+ 'format_id': 'original',
+ 'preference': 1,
+ 'url': original_filename.replace('s3://', 'https://s3.amazonaws.com/'),
+ })
+ s3_extracted = True
+ formats.append(f)
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'thumbnail': video_data.get('thumbnail'),
+ 'upload_date': unified_strdate(video_data.get('start_date')),
+ 'duration': parse_duration(video_data.get('duration')),
+ 'view_count': str_to_int(video_data.get('playcount')),
+ 'formats': formats,
+ 'subtitles': self._parse_subtitles(video_data, 'vtt'),
+ }
+
+
+class AdobeTVEmbedIE(AdobeTVBaseIE):
+ IE_NAME = 'adobetv:embed'
+ _VALID_URL = r'https?://tv\.adobe\.com/embed/\d+/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://tv.adobe.com/embed/22/4153',
+ 'md5': 'c8c0461bf04d54574fc2b4d07ac6783a',
+ 'info_dict': {
+ 'id': '4153',
+ 'ext': 'flv',
+ 'title': 'Creating Graphics Optimized for BlackBerry',
+ 'description': 'md5:eac6e8dced38bdaae51cd94447927459',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'upload_date': '20091109',
+ 'duration': 377,
+ 'view_count': int,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video_data = self._call_api(
+ 'episode/' + video_id, video_id, {'disclosure': 'standard'})[0]
+ return self._parse_video_data(video_data)
+
+
+class AdobeTVIE(AdobeTVBaseIE):
+ IE_NAME = 'adobetv'
+ _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?watch/(?P<show_urlname>[^/]+)/(?P<id>[^/]+)'
+
+ _TEST = {
+ 'url': 'http://tv.adobe.com/watch/the-complete-picture-with-julieanne-kost/quick-tip-how-to-draw-a-circle-around-an-object-in-photoshop/',
+ 'md5': '9bc5727bcdd55251f35ad311ca74fa1e',
+ 'info_dict': {
+ 'id': '10981',
+ 'ext': 'mp4',
+ 'title': 'Quick Tip - How to Draw a Circle Around an Object in Photoshop',
+ 'description': 'md5:99ec318dc909d7ba2a1f2b038f7d2311',
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'upload_date': '20110914',
+ 'duration': 60,
+ 'view_count': int,
+ },
+ }
+
+ def _real_extract(self, url):
+ language, show_urlname, urlname = re.match(self._VALID_URL, url).groups()
+ if not language:
+ language = 'en'
+
+ video_data = self._call_api(
+ 'episode/get', urlname, {
+ 'disclosure': 'standard',
+ 'language': language,
+ 'show_urlname': show_urlname,
+ 'urlname': urlname,
+ })[0]
+ return self._parse_video_data(video_data)
+
+
+class AdobeTVPlaylistBaseIE(AdobeTVBaseIE):
+ _PAGE_SIZE = 25
+
+ def _fetch_page(self, display_id, query, page):
+ page += 1
+ query['page'] = page
+ for element_data in self._call_api(
+ self._RESOURCE, display_id, query, 'Download Page %d' % page):
+ yield self._process_data(element_data)
+
+ def _extract_playlist_entries(self, display_id, query):
+ return OnDemandPagedList(functools.partial(
+ self._fetch_page, display_id, query), self._PAGE_SIZE)
+
+
+class AdobeTVShowIE(AdobeTVPlaylistBaseIE):
+ IE_NAME = 'adobetv:show'
+ _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?show/(?P<id>[^/]+)'
+
+ _TEST = {
+ 'url': 'http://tv.adobe.com/show/the-complete-picture-with-julieanne-kost',
+ 'info_dict': {
+ 'id': '36',
+ 'title': 'The Complete Picture with Julieanne Kost',
+ 'description': 'md5:fa50867102dcd1aa0ddf2ab039311b27',
+ },
+ 'playlist_mincount': 136,
+ }
+ _RESOURCE = 'episode'
+ _process_data = AdobeTVBaseIE._parse_video_data
+
+ def _real_extract(self, url):
+ language, show_urlname = re.match(self._VALID_URL, url).groups()
+ if not language:
+ language = 'en'
+ query = {
+ 'disclosure': 'standard',
+ 'language': language,
+ 'show_urlname': show_urlname,
+ }
+
+ show_data = self._call_api(
+ 'show/get', show_urlname, query)[0]
+
+ return self.playlist_result(
+ self._extract_playlist_entries(show_urlname, query),
+ str_or_none(show_data.get('id')),
+ show_data.get('show_name'),
+ show_data.get('show_description'))
+
+
+class AdobeTVChannelIE(AdobeTVPlaylistBaseIE):
+ IE_NAME = 'adobetv:channel'
+ _VALID_URL = r'https?://tv\.adobe\.com/(?:(?P<language>fr|de|es|jp)/)?channel/(?P<id>[^/]+)(?:/(?P<category_urlname>[^/]+))?'
+
+ _TEST = {
+ 'url': 'http://tv.adobe.com/channel/development',
+ 'info_dict': {
+ 'id': 'development',
+ },
+ 'playlist_mincount': 96,
+ }
+ _RESOURCE = 'show'
+
+ def _process_data(self, show_data):
+ return self.url_result(
+ show_data['url'], 'AdobeTVShow', str_or_none(show_data.get('id')))
+
+ def _real_extract(self, url):
+ language, channel_urlname, category_urlname = re.match(self._VALID_URL, url).groups()
+ if not language:
+ language = 'en'
+ query = {
+ 'channel_urlname': channel_urlname,
+ 'language': language,
+ }
+ if category_urlname:
+ query['category_urlname'] = category_urlname
+
+ return self.playlist_result(
+ self._extract_playlist_entries(channel_urlname, query),
+ channel_urlname)
+
+
+class AdobeTVVideoIE(AdobeTVBaseIE):
+ IE_NAME = 'adobetv:video'
+ _VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)'
+
+ _TEST = {
+ # From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners
+ 'url': 'https://video.tv.adobe.com/v/2456/',
+ 'md5': '43662b577c018ad707a63766462b1e87',
+ 'info_dict': {
+ 'id': '2456',
+ 'ext': 'mp4',
+ 'title': 'New experience with Acrobat DC',
+ 'description': 'New experience with Acrobat DC',
+ 'duration': 248.667,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ video_data = self._parse_json(self._search_regex(
+ r'var\s+bridge\s*=\s*([^;]+);', webpage, 'bridged data'), video_id)
+ title = video_data['title']
+
+ formats = []
+ sources = video_data.get('sources') or []
+ for source in sources:
+ source_src = source.get('src')
+ if not source_src:
+ continue
+ formats.append({
+ 'filesize': int_or_none(source.get('kilobytes') or None, invscale=1000),
+ 'format_id': '-'.join(filter(None, [source.get('format'), source.get('label')])),
+ 'height': int_or_none(source.get('height') or None),
+ 'tbr': int_or_none(source.get('bitrate') or None),
+ 'width': int_or_none(source.get('width') or None),
+ 'url': source_src,
+ })
+ self._sort_formats(formats)
+
+ # For both metadata and downloaded files the duration varies among
+ # formats. I just pick the max one
+ duration = max(filter(None, [
+ float_or_none(source.get('duration'), scale=1000)
+ for source in sources]))
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'thumbnail': video_data.get('video', {}).get('poster'),
+ 'duration': duration,
+ 'subtitles': self._parse_subtitles(video_data, 'vttPath'),
+ }
diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dlc/extractor/adultswim.py
index 8d1d9ac7d..8d1d9ac7d 100644
--- a/youtube_dl/extractor/adultswim.py
+++ b/youtube_dlc/extractor/adultswim.py
diff --git a/youtube_dl/extractor/aenetworks.py b/youtube_dlc/extractor/aenetworks.py
index 611b948f5..611b948f5 100644
--- a/youtube_dl/extractor/aenetworks.py
+++ b/youtube_dlc/extractor/aenetworks.py
diff --git a/youtube_dl/extractor/afreecatv.py b/youtube_dlc/extractor/afreecatv.py
index 6275e5209..6275e5209 100644
--- a/youtube_dl/extractor/afreecatv.py
+++ b/youtube_dlc/extractor/afreecatv.py
diff --git a/youtube_dl/extractor/airmozilla.py b/youtube_dlc/extractor/airmozilla.py
index 9e38136b4..9e38136b4 100644
--- a/youtube_dl/extractor/airmozilla.py
+++ b/youtube_dlc/extractor/airmozilla.py
diff --git a/youtube_dl/extractor/aliexpress.py b/youtube_dlc/extractor/aliexpress.py
index 6f241e683..6f241e683 100644
--- a/youtube_dl/extractor/aliexpress.py
+++ b/youtube_dlc/extractor/aliexpress.py
diff --git a/youtube_dl/extractor/aljazeera.py b/youtube_dlc/extractor/aljazeera.py
index c68be3134..c68be3134 100644
--- a/youtube_dl/extractor/aljazeera.py
+++ b/youtube_dlc/extractor/aljazeera.py
diff --git a/youtube_dl/extractor/allocine.py b/youtube_dlc/extractor/allocine.py
index cd533acfc..cd533acfc 100644
--- a/youtube_dl/extractor/allocine.py
+++ b/youtube_dlc/extractor/allocine.py
diff --git a/youtube_dl/extractor/alphaporno.py b/youtube_dlc/extractor/alphaporno.py
index 3a6d99f6b..3a6d99f6b 100644
--- a/youtube_dl/extractor/alphaporno.py
+++ b/youtube_dlc/extractor/alphaporno.py
diff --git a/youtube_dl/extractor/amcnetworks.py b/youtube_dlc/extractor/amcnetworks.py
index 6fb3d6c53..6fb3d6c53 100644
--- a/youtube_dl/extractor/amcnetworks.py
+++ b/youtube_dlc/extractor/amcnetworks.py
diff --git a/youtube_dlc/extractor/americastestkitchen.py b/youtube_dlc/extractor/americastestkitchen.py
new file mode 100644
index 000000000..9c9d77ae1
--- /dev/null
+++ b/youtube_dlc/extractor/americastestkitchen.py
@@ -0,0 +1,82 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ int_or_none,
+ js_to_json,
+ try_get,
+ unified_strdate,
+)
+
+
+class AmericasTestKitchenIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?americastestkitchen\.com/(?:episode|videos)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.americastestkitchen.com/episode/582-weeknight-japanese-suppers',
+ 'md5': 'b861c3e365ac38ad319cfd509c30577f',
+ 'info_dict': {
+ 'id': '5b400b9ee338f922cb06450c',
+ 'title': 'Weeknight Japanese Suppers',
+ 'ext': 'mp4',
+ 'description': 'md5:3d0c1a44bb3b27607ce82652db25b4a8',
+ 'thumbnail': r're:^https?://',
+ 'timestamp': 1523664000,
+ 'upload_date': '20180414',
+ 'release_date': '20180414',
+ 'series': "America's Test Kitchen",
+ 'season_number': 18,
+ 'episode': 'Weeknight Japanese Suppers',
+ 'episode_number': 15,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.americastestkitchen.com/videos/3420-pan-seared-salmon',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_data = self._parse_json(
+ self._search_regex(
+ r'window\.__INITIAL_STATE__\s*=\s*({.+?})\s*;\s*</script>',
+ webpage, 'initial context'),
+ video_id, js_to_json)
+
+ ep_data = try_get(
+ video_data,
+ (lambda x: x['episodeDetail']['content']['data'],
+ lambda x: x['videoDetail']['content']['data']), dict)
+ ep_meta = ep_data.get('full_video', {})
+
+ zype_id = ep_data.get('zype_id') or ep_meta['zype_id']
+
+ title = ep_data.get('title') or ep_meta.get('title')
+ description = clean_html(ep_meta.get('episode_description') or ep_data.get(
+ 'description') or ep_meta.get('description'))
+ thumbnail = try_get(ep_meta, lambda x: x['photo']['image_url'])
+ release_date = unified_strdate(ep_data.get('aired_at'))
+
+ season_number = int_or_none(ep_meta.get('season_number'))
+ episode = ep_meta.get('title')
+ episode_number = int_or_none(ep_meta.get('episode_number'))
+
+ return {
+ '_type': 'url_transparent',
+ 'url': 'https://player.zype.com/embed/%s.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ' % zype_id,
+ 'ie_key': 'Zype',
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'release_date': release_date,
+ 'series': "America's Test Kitchen",
+ 'season_number': season_number,
+ 'episode': episode,
+ 'episode_number': episode_number,
+ }
diff --git a/youtube_dl/extractor/amp.py b/youtube_dlc/extractor/amp.py
index 7ff098cfa..7ff098cfa 100644
--- a/youtube_dl/extractor/amp.py
+++ b/youtube_dlc/extractor/amp.py
diff --git a/youtube_dl/extractor/animeondemand.py b/youtube_dlc/extractor/animeondemand.py
index 00ce684d1..00ce684d1 100644
--- a/youtube_dl/extractor/animeondemand.py
+++ b/youtube_dlc/extractor/animeondemand.py
diff --git a/youtube_dl/extractor/anvato.py b/youtube_dlc/extractor/anvato.py
index 84e841035..84e841035 100644
--- a/youtube_dl/extractor/anvato.py
+++ b/youtube_dlc/extractor/anvato.py
diff --git a/youtube_dl/extractor/aol.py b/youtube_dlc/extractor/aol.py
index e87994a6a..e87994a6a 100644
--- a/youtube_dl/extractor/aol.py
+++ b/youtube_dlc/extractor/aol.py
diff --git a/youtube_dl/extractor/apa.py b/youtube_dlc/extractor/apa.py
index 98ccdaa4a..98ccdaa4a 100644
--- a/youtube_dl/extractor/apa.py
+++ b/youtube_dlc/extractor/apa.py
diff --git a/youtube_dl/extractor/aparat.py b/youtube_dlc/extractor/aparat.py
index 883dcee7a..883dcee7a 100644
--- a/youtube_dl/extractor/aparat.py
+++ b/youtube_dlc/extractor/aparat.py
diff --git a/youtube_dl/extractor/appleconnect.py b/youtube_dlc/extractor/appleconnect.py
index a84b8b1eb..a84b8b1eb 100644
--- a/youtube_dl/extractor/appleconnect.py
+++ b/youtube_dlc/extractor/appleconnect.py
diff --git a/youtube_dlc/extractor/appletrailers.py b/youtube_dlc/extractor/appletrailers.py
new file mode 100644
index 000000000..b5ed2b88b
--- /dev/null
+++ b/youtube_dlc/extractor/appletrailers.py
@@ -0,0 +1,283 @@
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ unified_strdate,
+)
+
+
+class AppleTrailersIE(InfoExtractor):
+ IE_NAME = 'appletrailers'
+ _VALID_URL = r'https?://(?:www\.|movie)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'
+ _TESTS = [{
+ 'url': 'http://trailers.apple.com/trailers/wb/manofsteel/',
+ 'info_dict': {
+ 'id': '5111',
+ 'title': 'Man of Steel',
+ },
+ 'playlist': [
+ {
+ 'md5': 'd97a8e575432dbcb81b7c3acb741f8a8',
+ 'info_dict': {
+ 'id': 'manofsteel-trailer4',
+ 'ext': 'mov',
+ 'duration': 111,
+ 'title': 'Trailer 4',
+ 'upload_date': '20130523',
+ 'uploader_id': 'wb',
+ },
+ },
+ {
+ 'md5': 'b8017b7131b721fb4e8d6f49e1df908c',
+ 'info_dict': {
+ 'id': 'manofsteel-trailer3',
+ 'ext': 'mov',
+ 'duration': 182,
+ 'title': 'Trailer 3',
+ 'upload_date': '20130417',
+ 'uploader_id': 'wb',
+ },
+ },
+ {
+ 'md5': 'd0f1e1150989b9924679b441f3404d48',
+ 'info_dict': {
+ 'id': 'manofsteel-trailer',
+ 'ext': 'mov',
+ 'duration': 148,
+ 'title': 'Trailer',
+ 'upload_date': '20121212',
+ 'uploader_id': 'wb',
+ },
+ },
+ {
+ 'md5': '5fe08795b943eb2e757fa95cb6def1cb',
+ 'info_dict': {
+ 'id': 'manofsteel-teaser',
+ 'ext': 'mov',
+ 'duration': 93,
+ 'title': 'Teaser',
+ 'upload_date': '20120721',
+ 'uploader_id': 'wb',
+ },
+ },
+ ]
+ }, {
+ 'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/',
+ 'info_dict': {
+ 'id': '4489',
+ 'title': 'Blackthorn',
+ },
+ 'playlist_mincount': 2,
+ 'expected_warnings': ['Unable to download JSON metadata'],
+ }, {
+ # json data only available from http://trailers.apple.com/trailers/feeds/data/15881.json
+ 'url': 'http://trailers.apple.com/trailers/fox/kungfupanda3/',
+ 'info_dict': {
+ 'id': '15881',
+ 'title': 'Kung Fu Panda 3',
+ },
+ 'playlist_mincount': 4,
+ }, {
+ 'url': 'http://trailers.apple.com/ca/metropole/autrui/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://movietrailers.apple.com/trailers/focus_features/kuboandthetwostrings/',
+ 'only_matching': True,
+ }]
+
+ _JSON_RE = r'iTunes.playURL\((.*?)\);'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ movie = mobj.group('movie')
+ uploader_id = mobj.group('company')
+
+ webpage = self._download_webpage(url, movie)
+ film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id')
+ film_data = self._download_json(
+ 'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id,
+ film_id, fatal=False)
+
+ if film_data:
+ entries = []
+ for clip in film_data.get('clips', []):
+ clip_title = clip['title']
+
+ formats = []
+ for version, version_data in clip.get('versions', {}).items():
+ for size, size_data in version_data.get('sizes', {}).items():
+ src = size_data.get('src')
+ if not src:
+ continue
+ formats.append({
+ 'format_id': '%s-%s' % (version, size),
+ 'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src),
+ 'width': int_or_none(size_data.get('width')),
+ 'height': int_or_none(size_data.get('height')),
+ 'language': version[:2],
+ })
+ self._sort_formats(formats)
+
+ entries.append({
+ 'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(),
+ 'formats': formats,
+ 'title': clip_title,
+ 'thumbnail': clip.get('screen') or clip.get('thumb'),
+ 'duration': parse_duration(clip.get('runtime') or clip.get('faded')),
+ 'upload_date': unified_strdate(clip.get('posted')),
+ 'uploader_id': uploader_id,
+ })
+
+ page_data = film_data.get('page', {})
+ return self.playlist_result(entries, film_id, page_data.get('movie_title'))
+
+ playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
+
+ def fix_html(s):
+ s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
+ s = re.sub(r'<img ([^<]*?)/?>', r'<img \1/>', s)
+ # The ' in the onClick attributes are not escaped, it couldn't be parsed
+ # like: http://trailers.apple.com/trailers/wb/gravity/
+
+ def _clean_json(m):
+ return 'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
+ s = re.sub(self._JSON_RE, _clean_json, s)
+ s = '<html>%s</html>' % s
+ return s
+ doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
+
+ playlist = []
+ for li in doc.findall('./div/ul/li'):
+ on_click = li.find('.//a').attrib['onClick']
+ trailer_info_json = self._search_regex(self._JSON_RE,
+ on_click, 'trailer info')
+ trailer_info = json.loads(trailer_info_json)
+ first_url = trailer_info.get('url')
+ if not first_url:
+ continue
+ title = trailer_info['title']
+ video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
+ thumbnail = li.find('.//img').attrib['src']
+ upload_date = trailer_info['posted'].replace('-', '')
+
+ runtime = trailer_info['runtime']
+ m = re.search(r'(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime)
+ duration = None
+ if m:
+ duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
+
+ trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
+ settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)
+ settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json')
+
+ formats = []
+ for format in settings['metadata']['sizes']:
+ # The src is a file pointing to the real video file
+ format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src'])
+ formats.append({
+ 'url': format_url,
+ 'format': format['type'],
+ 'width': int_or_none(format['width']),
+ 'height': int_or_none(format['height']),
+ })
+
+ self._sort_formats(formats)
+
+ playlist.append({
+ '_type': 'video',
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'uploader_id': uploader_id,
+ 'http_headers': {
+ 'User-Agent': 'QuickTime compatible (youtube-dlc)',
+ },
+ })
+
+ return {
+ '_type': 'playlist',
+ 'id': movie,
+ 'entries': playlist,
+ }
+
+
+class AppleTrailersSectionIE(InfoExtractor):
+ IE_NAME = 'appletrailers:section'
+ _SECTIONS = {
+ 'justadded': {
+ 'feed_path': 'just_added',
+ 'title': 'Just Added',
+ },
+ 'exclusive': {
+ 'feed_path': 'exclusive',
+ 'title': 'Exclusive',
+ },
+ 'justhd': {
+ 'feed_path': 'just_hd',
+ 'title': 'Just HD',
+ },
+ 'mostpopular': {
+ 'feed_path': 'most_pop',
+ 'title': 'Most Popular',
+ },
+ 'moviestudios': {
+ 'feed_path': 'studios',
+ 'title': 'Movie Studios',
+ },
+ }
+ _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>%s)' % '|'.join(_SECTIONS)
+ _TESTS = [{
+ 'url': 'http://trailers.apple.com/#section=justadded',
+ 'info_dict': {
+ 'title': 'Just Added',
+ 'id': 'justadded',
+ },
+ 'playlist_mincount': 80,
+ }, {
+ 'url': 'http://trailers.apple.com/#section=exclusive',
+ 'info_dict': {
+ 'title': 'Exclusive',
+ 'id': 'exclusive',
+ },
+ 'playlist_mincount': 80,
+ }, {
+ 'url': 'http://trailers.apple.com/#section=justhd',
+ 'info_dict': {
+ 'title': 'Just HD',
+ 'id': 'justhd',
+ },
+ 'playlist_mincount': 80,
+ }, {
+ 'url': 'http://trailers.apple.com/#section=mostpopular',
+ 'info_dict': {
+ 'title': 'Most Popular',
+ 'id': 'mostpopular',
+ },
+ 'playlist_mincount': 30,
+ }, {
+ 'url': 'http://trailers.apple.com/#section=moviestudios',
+ 'info_dict': {
+ 'title': 'Movie Studios',
+ 'id': 'moviestudios',
+ },
+ 'playlist_mincount': 80,
+ }]
+
+ def _real_extract(self, url):
+ section = self._match_id(url)
+ section_data = self._download_json(
+ 'http://trailers.apple.com/trailers/home/feeds/%s.json' % self._SECTIONS[section]['feed_path'],
+ section)
+ entries = [
+ self.url_result('http://trailers.apple.com' + e['location'])
+ for e in section_data]
+ return self.playlist_result(entries, section, self._SECTIONS[section]['title'])
diff --git a/youtube_dl/extractor/archiveorg.py b/youtube_dlc/extractor/archiveorg.py
index c79c58e82..c79c58e82 100644
--- a/youtube_dl/extractor/archiveorg.py
+++ b/youtube_dlc/extractor/archiveorg.py
diff --git a/youtube_dlc/extractor/ard.py b/youtube_dlc/extractor/ard.py
new file mode 100644
index 000000000..5b7b2dd6d
--- /dev/null
+++ b/youtube_dlc/extractor/ard.py
@@ -0,0 +1,422 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from .generic import GenericIE
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ parse_duration,
+ qualities,
+ str_or_none,
+ try_get,
+ unified_strdate,
+ unified_timestamp,
+ update_url_query,
+ url_or_none,
+ xpath_text,
+)
+from ..compat import compat_etree_fromstring
+
+
+class ARDMediathekBaseIE(InfoExtractor):
+ _GEO_COUNTRIES = ['DE']
+
+ def _extract_media_info(self, media_info_url, webpage, video_id):
+ media_info = self._download_json(
+ media_info_url, video_id, 'Downloading media JSON')
+ return self._parse_media_info(media_info, video_id, '"fsk"' in webpage)
+
+ def _parse_media_info(self, media_info, video_id, fsk):
+ formats = self._extract_formats(media_info, video_id)
+
+ if not formats:
+ if fsk:
+ raise ExtractorError(
+ 'This video is only available after 20:00', expected=True)
+ elif media_info.get('_geoblocked'):
+ self.raise_geo_restricted(
+ 'This video is not available due to geoblocking',
+ countries=self._GEO_COUNTRIES)
+
+ self._sort_formats(formats)
+
+ subtitles = {}
+ subtitle_url = media_info.get('_subtitleUrl')
+ if subtitle_url:
+ subtitles['de'] = [{
+ 'ext': 'ttml',
+ 'url': subtitle_url,
+ }]
+
+ return {
+ 'id': video_id,
+ 'duration': int_or_none(media_info.get('_duration')),
+ 'thumbnail': media_info.get('_previewImage'),
+ 'is_live': media_info.get('_isLive') is True,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def _extract_formats(self, media_info, video_id):
+ type_ = media_info.get('_type')
+ media_array = media_info.get('_mediaArray', [])
+ formats = []
+ for num, media in enumerate(media_array):
+ for stream in media.get('_mediaStreamArray', []):
+ stream_urls = stream.get('_stream')
+ if not stream_urls:
+ continue
+ if not isinstance(stream_urls, list):
+ stream_urls = [stream_urls]
+ quality = stream.get('_quality')
+ server = stream.get('_server')
+ for stream_url in stream_urls:
+ if not url_or_none(stream_url):
+ continue
+ ext = determine_ext(stream_url)
+ if quality != 'auto' and ext in ('f4m', 'm3u8'):
+ continue
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ update_url_query(stream_url, {
+ 'hdcore': '3.1.1',
+ 'plugin': 'aasp-3.1.1.69.124'
+ }), video_id, f4m_id='hds', fatal=False))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ stream_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ if server and server.startswith('rtmp'):
+ f = {
+ 'url': server,
+ 'play_path': stream_url,
+ 'format_id': 'a%s-rtmp-%s' % (num, quality),
+ }
+ else:
+ f = {
+ 'url': stream_url,
+ 'format_id': 'a%s-%s-%s' % (num, ext, quality)
+ }
+ m = re.search(
+ r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$',
+ stream_url)
+ if m:
+ f.update({
+ 'width': int(m.group('width')),
+ 'height': int(m.group('height')),
+ })
+ if type_ == 'audio':
+ f['vcodec'] = 'none'
+ formats.append(f)
+ return formats
+
+
+class ARDMediathekIE(ARDMediathekBaseIE):
+ IE_NAME = 'ARD:mediathek'
+ _VALID_URL = r'^https?://(?:(?:(?:www|classic)\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?'
+
+ _TESTS = [{
+ # available till 26.07.2022
+ 'url': 'http://www.ardmediathek.de/tv/S%C3%9CDLICHT/Was-ist-die-Kunst-der-Zukunft-liebe-Ann/BR-Fernsehen/Video?bcastId=34633636&documentId=44726822',
+ 'info_dict': {
+ 'id': '44726822',
+ 'ext': 'mp4',
+ 'title': 'Was ist die Kunst der Zukunft, liebe Anna McCarthy?',
+ 'description': 'md5:4ada28b3e3b5df01647310e41f3a62f5',
+ 'duration': 1740,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://one.ard.de/tv/Mord-mit-Aussicht/Mord-mit-Aussicht-6-39-T%C3%B6dliche-Nach/ONE/Video?bcastId=46384294&documentId=55586872',
+ 'only_matching': True,
+ }, {
+ # audio
+ 'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht',
+ 'only_matching': True,
+ }, {
+ # audio
+ 'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://classic.ardmediathek.de/tv/Panda-Gorilla-Co/Panda-Gorilla-Co-Folge-274/Das-Erste/Video?bcastId=16355486&documentId=58234698',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if ARDBetaMediathekIE.suitable(url) else super(ARDMediathekIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ # determine video id from url
+ m = re.match(self._VALID_URL, url)
+
+ document_id = None
+
+ numid = re.search(r'documentId=([0-9]+)', url)
+ if numid:
+ document_id = video_id = numid.group(1)
+ else:
+ video_id = m.group('video_id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ ERRORS = (
+ ('>Leider liegt eine Störung vor.', 'Video %s is unavailable'),
+ ('>Der gewünschte Beitrag ist nicht mehr verfügbar.<',
+ 'Video %s is no longer available'),
+ )
+
+ for pattern, message in ERRORS:
+ if pattern in webpage:
+ raise ExtractorError(message % video_id, expected=True)
+
+ if re.search(r'[\?&]rss($|[=&])', url):
+ doc = compat_etree_fromstring(webpage.encode('utf-8'))
+ if doc.tag == 'rss':
+ return GenericIE()._extract_rss(url, video_id, doc)
+
+ title = self._html_search_regex(
+ [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>',
+ r'<meta name="dcterms\.title" content="(.*?)"/>',
+ r'<h4 class="headline">(.*?)</h4>',
+ r'<title[^>]*>(.*?)</title>'],
+ webpage, 'title')
+ description = self._html_search_meta(
+ 'dcterms.abstract', webpage, 'description', default=None)
+ if description is None:
+ description = self._html_search_meta(
+ 'description', webpage, 'meta description', default=None)
+ if description is None:
+ description = self._html_search_regex(
+ r'<p\s+class="teasertext">(.+?)</p>',
+ webpage, 'teaser text', default=None)
+
+ # Thumbnail is sometimes not present.
+ # It is in the mobile version, but that seems to use a different URL
+ # structure altogether.
+ thumbnail = self._og_search_thumbnail(webpage, default=None)
+
+ media_streams = re.findall(r'''(?x)
+ mediaCollection\.addMediaStream\([0-9]+,\s*[0-9]+,\s*"[^"]*",\s*
+ "([^"]+)"''', webpage)
+
+ if media_streams:
+ QUALITIES = qualities(['lo', 'hi', 'hq'])
+ formats = []
+ for furl in set(media_streams):
+ if furl.endswith('.f4m'):
+ fid = 'f4m'
+ else:
+ fid_m = re.match(r'.*\.([^.]+)\.[^.]+$', furl)
+ fid = fid_m.group(1) if fid_m else None
+ formats.append({
+ 'quality': QUALITIES(fid),
+ 'format_id': fid,
+ 'url': furl,
+ })
+ self._sort_formats(formats)
+ info = {
+ 'formats': formats,
+ }
+ else: # request JSON file
+ if not document_id:
+ video_id = self._search_regex(
+ r'/play/(?:config|media)/(\d+)', webpage, 'media id')
+ info = self._extract_media_info(
+ 'http://www.ardmediathek.de/play/media/%s' % video_id,
+ webpage, video_id)
+
+ info.update({
+ 'id': video_id,
+ 'title': self._live_title(title) if info.get('is_live') else title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ })
+
+ return info
+
+
+class ARDIE(InfoExtractor):
+ _VALID_URL = r'(?P<mainurl>https?://(www\.)?daserste\.de/[^?#]+/videos(?:extern)?/(?P<display_id>[^/?#]+)-(?P<id>[0-9]+))\.html'
+ _TESTS = [{
+ # available till 14.02.2019
+ 'url': 'http://www.daserste.de/information/talk/maischberger/videos/das-groko-drama-zerlegen-sich-die-volksparteien-video-102.html',
+ 'md5': '8e4ec85f31be7c7fc08a26cdbc5a1f49',
+ 'info_dict': {
+ 'display_id': 'das-groko-drama-zerlegen-sich-die-volksparteien-video',
+ 'id': '102',
+ 'ext': 'mp4',
+ 'duration': 4435.0,
+ 'title': 'Das GroKo-Drama: Zerlegen sich die Volksparteien?',
+ 'upload_date': '20180214',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ }, {
+ 'url': 'https://www.daserste.de/information/reportage-dokumentation/erlebnis-erde/videosextern/woelfe-und-herdenschutzhunde-ungleiche-brueder-102.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id = mobj.group('display_id')
+
+ player_url = mobj.group('mainurl') + '~playerXml.xml'
+ doc = self._download_xml(player_url, display_id)
+ video_node = doc.find('./video')
+ upload_date = unified_strdate(xpath_text(
+ video_node, './broadcastDate'))
+ thumbnail = xpath_text(video_node, './/teaserImage//variant/url')
+
+ formats = []
+ for a in video_node.findall('.//asset'):
+ f = {
+ 'format_id': a.attrib['type'],
+ 'width': int_or_none(a.find('./frameWidth').text),
+ 'height': int_or_none(a.find('./frameHeight').text),
+ 'vbr': int_or_none(a.find('./bitrateVideo').text),
+ 'abr': int_or_none(a.find('./bitrateAudio').text),
+ 'vcodec': a.find('./codecVideo').text,
+ 'tbr': int_or_none(a.find('./totalBitrate').text),
+ }
+ if a.find('./serverPrefix').text:
+ f['url'] = a.find('./serverPrefix').text
+ f['playpath'] = a.find('./fileName').text
+ else:
+ f['url'] = a.find('./fileName').text
+ formats.append(f)
+ self._sort_formats(formats)
+
+ return {
+ 'id': mobj.group('id'),
+ 'formats': formats,
+ 'display_id': display_id,
+ 'title': video_node.find('./title').text,
+ 'duration': parse_duration(video_node.find('./duration').text),
+ 'upload_date': upload_date,
+ 'thumbnail': thumbnail,
+ }
+
+
+class ARDBetaMediathekIE(ARDMediathekBaseIE):
+ _VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?:player|live|video)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)'
+ _TESTS = [{
+ 'url': 'https://ardmediathek.de/ard/video/die-robuste-roswita/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
+ 'md5': 'dfdc87d2e7e09d073d5a80770a9ce88f',
+ 'info_dict': {
+ 'display_id': 'die-robuste-roswita',
+ 'id': '70153354',
+ 'title': 'Die robuste Roswita',
+ 'description': r're:^Der Mord.*trüber ist als die Ilm.',
+ 'duration': 5316,
+ 'thumbnail': 'https://img.ardmediathek.de/standard/00/70/15/33/90/-1852531467/16x9/960?mandant=ard',
+ 'timestamp': 1577047500,
+ 'upload_date': '20191222',
+ 'ext': 'mp4',
+ },
+ }, {
+ 'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://ardmediathek.de/ard/video/saartalk/saartalk-gesellschaftsgift-haltung-gegen-hass/sr-fernsehen/Y3JpZDovL3NyLW9ubGluZS5kZS9TVF84MTY4MA/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ardmediathek.de/ard/video/trailer/private-eyes-s01-e01/one/Y3JpZDovL3dkci5kZS9CZWl0cmFnLTE1MTgwYzczLWNiMTEtNGNkMS1iMjUyLTg5MGYzOWQxZmQ1YQ/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3N3ci5kZS9hZXgvbzEwNzE5MTU/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('video_id')
+ display_id = mobj.group('display_id')
+ if display_id:
+ display_id = display_id.rstrip('/')
+ if not display_id:
+ display_id = video_id
+
+ player_page = self._download_json(
+ 'https://api.ardmediathek.de/public-gateway',
+ display_id, data=json.dumps({
+ 'query': '''{
+ playerPage(client:"%s", clipId: "%s") {
+ blockedByFsk
+ broadcastedOn
+ maturityContentRating
+ mediaCollection {
+ _duration
+ _geoblocked
+ _isLive
+ _mediaArray {
+ _mediaStreamArray {
+ _quality
+ _server
+ _stream
+ }
+ }
+ _previewImage
+ _subtitleUrl
+ _type
+ }
+ show {
+ title
+ }
+ synopsis
+ title
+ tracking {
+ atiCustomVars {
+ contentId
+ }
+ }
+ }
+}''' % (mobj.group('client'), video_id),
+ }).encode(), headers={
+ 'Content-Type': 'application/json'
+ })['data']['playerPage']
+ title = player_page['title']
+ content_id = str_or_none(try_get(
+ player_page, lambda x: x['tracking']['atiCustomVars']['contentId']))
+ media_collection = player_page.get('mediaCollection') or {}
+ if not media_collection and content_id:
+ media_collection = self._download_json(
+ 'https://www.ardmediathek.de/play/media/' + content_id,
+ content_id, fatal=False) or {}
+ info = self._parse_media_info(
+ media_collection, content_id or video_id,
+ player_page.get('blockedByFsk'))
+ age_limit = None
+ description = player_page.get('synopsis')
+ maturity_content_rating = player_page.get('maturityContentRating')
+ if maturity_content_rating:
+ age_limit = int_or_none(maturity_content_rating.lstrip('FSK'))
+ if not age_limit and description:
+ age_limit = int_or_none(self._search_regex(
+ r'\(FSK\s*(\d+)\)\s*$', description, 'age limit', default=None))
+ info.update({
+ 'age_limit': age_limit,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'timestamp': unified_timestamp(player_page.get('broadcastedOn')),
+ 'series': try_get(player_page, lambda x: x['show']['title']),
+ })
+ return info
diff --git a/youtube_dl/extractor/arkena.py b/youtube_dlc/extractor/arkena.py
index 854f58767..854f58767 100644
--- a/youtube_dl/extractor/arkena.py
+++ b/youtube_dlc/extractor/arkena.py
diff --git a/youtube_dl/extractor/arte.py b/youtube_dlc/extractor/arte.py
index 2bd3bfe8a..2bd3bfe8a 100644
--- a/youtube_dl/extractor/arte.py
+++ b/youtube_dlc/extractor/arte.py
diff --git a/youtube_dl/extractor/asiancrush.py b/youtube_dlc/extractor/asiancrush.py
index 0348e680c..0348e680c 100644
--- a/youtube_dl/extractor/asiancrush.py
+++ b/youtube_dlc/extractor/asiancrush.py
diff --git a/youtube_dlc/extractor/atresplayer.py b/youtube_dlc/extractor/atresplayer.py
new file mode 100644
index 000000000..c2cec9845
--- /dev/null
+++ b/youtube_dlc/extractor/atresplayer.py
@@ -0,0 +1,118 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ urlencode_postdata,
+)
+
+
+class AtresPlayerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?atresplayer\.com/[^/]+/[^/]+/[^/]+/[^/]+/(?P<display_id>.+?)_(?P<id>[0-9a-f]{24})'
+ _NETRC_MACHINE = 'atresplayer'
+ _TESTS = [
+ {
+ 'url': 'https://www.atresplayer.com/antena3/series/pequenas-coincidencias/temporada-1/capitulo-7-asuntos-pendientes_5d4aa2c57ed1a88fc715a615/',
+ 'info_dict': {
+ 'id': '5d4aa2c57ed1a88fc715a615',
+ 'ext': 'mp4',
+ 'title': 'Capítulo 7: Asuntos pendientes',
+ 'description': 'md5:7634cdcb4d50d5381bedf93efb537fbc',
+ 'duration': 3413,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ 'skip': 'This video is only available for registered users'
+ },
+ {
+ 'url': 'https://www.atresplayer.com/lasexta/programas/el-club-de-la-comedia/temporada-4/capitulo-10-especial-solidario-nochebuena_5ad08edf986b2855ed47adc4/',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://www.atresplayer.com/antena3/series/el-secreto-de-puente-viejo/el-chico-de-los-tres-lunares/capitulo-977-29-12-14_5ad51046986b2886722ccdea/',
+ 'only_matching': True,
+ },
+ ]
+ _API_BASE = 'https://api.atresplayer.com/'
+
+ def _real_initialize(self):
+ self._login()
+
+ def _handle_error(self, e, code):
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == code:
+ error = self._parse_json(e.cause.read(), None)
+ if error.get('error') == 'required_registered':
+ self.raise_login_required()
+ raise ExtractorError(error['error_description'], expected=True)
+ raise
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ self._request_webpage(
+ self._API_BASE + 'login', None, 'Downloading login page')
+
+ try:
+ target_url = self._download_json(
+ 'https://account.atresmedia.com/api/login', None,
+ 'Logging in', headers={
+ 'Content-Type': 'application/x-www-form-urlencoded'
+ }, data=urlencode_postdata({
+ 'username': username,
+ 'password': password,
+ }))['targetUrl']
+ except ExtractorError as e:
+ self._handle_error(e, 400)
+
+ self._request_webpage(target_url, None, 'Following Target URL')
+
+ def _real_extract(self, url):
+ display_id, video_id = re.match(self._VALID_URL, url).groups()
+
+ try:
+ episode = self._download_json(
+ self._API_BASE + 'client/v1/player/episode/' + video_id, video_id)
+ except ExtractorError as e:
+ self._handle_error(e, 403)
+
+ title = episode['titulo']
+
+ formats = []
+ for source in episode.get('sources', []):
+ src = source.get('src')
+ if not src:
+ continue
+ src_type = source.get('type')
+ if src_type == 'application/vnd.apple.mpegurl':
+ formats.extend(self._extract_m3u8_formats(
+ src, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif src_type == 'application/dash+xml':
+ formats.extend(self._extract_mpd_formats(
+ src, video_id, mpd_id='dash', fatal=False))
+ self._sort_formats(formats)
+
+ heartbeat = episode.get('heartbeat') or {}
+ omniture = episode.get('omniture') or {}
+ get_meta = lambda x: heartbeat.get(x) or omniture.get(x)
+
+ return {
+ 'display_id': display_id,
+ 'id': video_id,
+ 'title': title,
+ 'description': episode.get('descripcion'),
+ 'thumbnail': episode.get('imgPoster'),
+ 'duration': int_or_none(episode.get('duration')),
+ 'formats': formats,
+ 'channel': get_meta('channel'),
+ 'season': get_meta('season'),
+ 'episode_number': int_or_none(get_meta('episodeNumber')),
+ }
diff --git a/youtube_dl/extractor/atttechchannel.py b/youtube_dlc/extractor/atttechchannel.py
index 8f93fb353..8f93fb353 100644
--- a/youtube_dl/extractor/atttechchannel.py
+++ b/youtube_dlc/extractor/atttechchannel.py
diff --git a/youtube_dl/extractor/atvat.py b/youtube_dlc/extractor/atvat.py
index 95e572d70..95e572d70 100644
--- a/youtube_dl/extractor/atvat.py
+++ b/youtube_dlc/extractor/atvat.py
diff --git a/youtube_dl/extractor/audimedia.py b/youtube_dlc/extractor/audimedia.py
index 6bd48ef15..6bd48ef15 100644
--- a/youtube_dl/extractor/audimedia.py
+++ b/youtube_dlc/extractor/audimedia.py
diff --git a/youtube_dlc/extractor/audioboom.py b/youtube_dlc/extractor/audioboom.py
new file mode 100644
index 000000000..c51837b40
--- /dev/null
+++ b/youtube_dlc/extractor/audioboom.py
@@ -0,0 +1,73 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ float_or_none,
+)
+
+
+class AudioBoomIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?audioboom\.com/(?:boos|posts)/(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'https://audioboom.com/posts/7398103-asim-chaudhry',
+ 'md5': '7b00192e593ff227e6a315486979a42d',
+ 'info_dict': {
+ 'id': '7398103',
+ 'ext': 'mp3',
+ 'title': 'Asim Chaudhry',
+ 'description': 'md5:2f3fef17dacc2595b5362e1d7d3602fc',
+ 'duration': 4000.99,
+ 'uploader': 'Sue Perkins: An hour or so with...',
+ 'uploader_url': r're:https?://(?:www\.)?audioboom\.com/channel/perkins',
+ }
+ }, {
+ 'url': 'https://audioboom.com/posts/4279833-3-09-2016-czaban-hour-3?t=0',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ clip = None
+
+ clip_store = self._parse_json(
+ self._html_search_regex(
+ r'data-new-clip-store=(["\'])(?P<json>{.+?})\1',
+ webpage, 'clip store', default='{}', group='json'),
+ video_id, fatal=False)
+ if clip_store:
+ clips = clip_store.get('clips')
+ if clips and isinstance(clips, list) and isinstance(clips[0], dict):
+ clip = clips[0]
+
+ def from_clip(field):
+ if clip:
+ return clip.get(field)
+
+ audio_url = from_clip('clipURLPriorToLoading') or self._og_search_property(
+ 'audio', webpage, 'audio url')
+ title = from_clip('title') or self._html_search_meta(
+ ['og:title', 'og:audio:title', 'audio_title'], webpage)
+ description = from_clip('description') or clean_html(from_clip('formattedDescription')) or self._og_search_description(webpage)
+
+ duration = float_or_none(from_clip('duration') or self._html_search_meta(
+ 'weibo:audio:duration', webpage))
+
+ uploader = from_clip('author') or self._html_search_meta(
+ ['og:audio:artist', 'twitter:audio:artist_name', 'audio_artist'], webpage, 'uploader')
+ uploader_url = from_clip('author_url') or self._html_search_meta(
+ 'audioboo:channel', webpage, 'uploader url')
+
+ return {
+ 'id': video_id,
+ 'url': audio_url,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'uploader': uploader,
+ 'uploader_url': uploader_url,
+ }
diff --git a/youtube_dl/extractor/audiomack.py b/youtube_dlc/extractor/audiomack.py
index cc7771354..cc7771354 100644
--- a/youtube_dl/extractor/audiomack.py
+++ b/youtube_dlc/extractor/audiomack.py
diff --git a/youtube_dl/extractor/awaan.py b/youtube_dlc/extractor/awaan.py
index a2603bbff..a2603bbff 100644
--- a/youtube_dl/extractor/awaan.py
+++ b/youtube_dlc/extractor/awaan.py
diff --git a/youtube_dl/extractor/aws.py b/youtube_dlc/extractor/aws.py
index dccfeaf73..dccfeaf73 100644
--- a/youtube_dl/extractor/aws.py
+++ b/youtube_dlc/extractor/aws.py
diff --git a/youtube_dlc/extractor/azmedien.py b/youtube_dlc/extractor/azmedien.py
new file mode 100644
index 000000000..b1e20def5
--- /dev/null
+++ b/youtube_dlc/extractor/azmedien.py
@@ -0,0 +1,66 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from .kaltura import KalturaIE
+
+
+class AZMedienIE(InfoExtractor):
+ IE_DESC = 'AZ Medien videos'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?
+ (?P<host>
+ telezueri\.ch|
+ telebaern\.tv|
+ telem1\.ch
+ )/
+ [^/]+/
+ (?P<id>
+ [^/]+-(?P<article_id>\d+)
+ )
+ (?:
+ \#video=
+ (?P<kaltura_id>
+ [_0-9a-z]+
+ )
+ )?
+ '''
+
+ _TESTS = [{
+ 'url': 'https://www.telezueri.ch/sonntalk/bundesrats-vakanzen-eu-rahmenabkommen-133214569',
+ 'info_dict': {
+ 'id': '1_anruz3wy',
+ 'ext': 'mp4',
+ 'title': 'Bundesrats-Vakanzen / EU-Rahmenabkommen',
+ 'uploader_id': 'TVOnline',
+ 'upload_date': '20180930',
+ 'timestamp': 1538328802,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.telebaern.tv/telebaern-news/montag-1-oktober-2018-ganze-sendung-133531189#video=0_7xjo9lf1',
+ 'only_matching': True
+ }]
+ _API_TEMPL = 'https://www.%s/api/pub/gql/%s/NewsArticleTeaser/cb9f2f81ed22e9b47f4ca64ea3cc5a5d13e88d1d'
+ _PARTNER_ID = '1719221'
+
+ def _real_extract(self, url):
+ host, display_id, article_id, entry_id = re.match(self._VALID_URL, url).groups()
+
+ if not entry_id:
+ entry_id = self._download_json(
+ self._API_TEMPL % (host, host.split('.')[0]), display_id, query={
+ 'variables': json.dumps({
+ 'contextId': 'NewsArticle:' + article_id,
+ }),
+ })['data']['context']['mainAsset']['video']['kaltura']['kalturaId']
+
+ return self.url_result(
+ 'kaltura:%s:%s' % (self._PARTNER_ID, entry_id),
+ ie=KalturaIE.ie_key(), video_id=entry_id)
diff --git a/youtube_dl/extractor/baidu.py b/youtube_dlc/extractor/baidu.py
index 234a661d3..234a661d3 100644
--- a/youtube_dl/extractor/baidu.py
+++ b/youtube_dlc/extractor/baidu.py
diff --git a/youtube_dlc/extractor/bandcamp.py b/youtube_dlc/extractor/bandcamp.py
new file mode 100644
index 000000000..b8a57e6a5
--- /dev/null
+++ b/youtube_dlc/extractor/bandcamp.py
@@ -0,0 +1,417 @@
+from __future__ import unicode_literals
+
+import random
+import re
+import time
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ KNOWN_EXTENSIONS,
+ parse_filesize,
+ str_or_none,
+ try_get,
+ unescapeHTML,
+ update_url_query,
+ unified_strdate,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class BandcampIE(InfoExtractor):
+ _VALID_URL = r'https?://[^/]+\.bandcamp\.com/track/(?P<title>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://youtube-dlc.bandcamp.com/track/youtube-dlc-test-song',
+ 'md5': 'c557841d5e50261777a6585648adf439',
+ 'info_dict': {
+ 'id': '1812978515',
+ 'ext': 'mp3',
+ 'title': "youtube-dlc \"'/\\\u00e4\u21ad - youtube-dlc test song \"'/\\\u00e4\u21ad",
+ 'duration': 9.8485,
+ },
+ '_skip': 'There is a limit of 200 free downloads / month for the test song'
+ }, {
+ # free download
+ 'url': 'http://benprunty.bandcamp.com/track/lanius-battle',
+ 'md5': '853e35bf34aa1d6fe2615ae612564b36',
+ 'info_dict': {
+ 'id': '2650410135',
+ 'ext': 'aiff',
+ 'title': 'Ben Prunty - Lanius (Battle)',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Ben Prunty',
+ 'timestamp': 1396508491,
+ 'upload_date': '20140403',
+ 'release_date': '20140403',
+ 'duration': 260.877,
+ 'track': 'Lanius (Battle)',
+ 'track_number': 1,
+ 'track_id': '2650410135',
+ 'artist': 'Ben Prunty',
+ 'album': 'FTL: Advanced Edition Soundtrack',
+ },
+ }, {
+ # no free download, mp3 128
+ 'url': 'https://relapsealumni.bandcamp.com/track/hail-to-fire',
+ 'md5': 'fec12ff55e804bb7f7ebeb77a800c8b7',
+ 'info_dict': {
+ 'id': '2584466013',
+ 'ext': 'mp3',
+ 'title': 'Mastodon - Hail to Fire',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Mastodon',
+ 'timestamp': 1322005399,
+ 'upload_date': '20111122',
+ 'release_date': '20040207',
+ 'duration': 120.79,
+ 'track': 'Hail to Fire',
+ 'track_number': 5,
+ 'track_id': '2584466013',
+ 'artist': 'Mastodon',
+ 'album': 'Call of the Mastodon',
+ },
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ title = mobj.group('title')
+ webpage = self._download_webpage(url, title)
+ thumbnail = self._html_search_meta('og:image', webpage, default=None)
+
+ track_id = None
+ track = None
+ track_number = None
+ duration = None
+
+ formats = []
+ track_info = self._parse_json(
+ self._search_regex(
+ r'trackinfo\s*:\s*\[\s*({.+?})\s*\]\s*,\s*?\n',
+ webpage, 'track info', default='{}'), title)
+ if track_info:
+ file_ = track_info.get('file')
+ if isinstance(file_, dict):
+ for format_id, format_url in file_.items():
+ if not url_or_none(format_url):
+ continue
+ ext, abr_str = format_id.split('-', 1)
+ formats.append({
+ 'format_id': format_id,
+ 'url': self._proto_relative_url(format_url, 'http:'),
+ 'ext': ext,
+ 'vcodec': 'none',
+ 'acodec': ext,
+ 'abr': int_or_none(abr_str),
+ })
+ track = track_info.get('title')
+ track_id = str_or_none(track_info.get('track_id') or track_info.get('id'))
+ track_number = int_or_none(track_info.get('track_num'))
+ duration = float_or_none(track_info.get('duration'))
+
+ def extract(key):
+ return self._search_regex(
+ r'\b%s\s*["\']?\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % key,
+ webpage, key, default=None, group='value')
+
+ artist = extract('artist')
+ album = extract('album_title')
+ timestamp = unified_timestamp(
+ extract('publish_date') or extract('album_publish_date'))
+ release_date = unified_strdate(extract('album_release_date'))
+
+ download_link = self._search_regex(
+ r'freeDownloadPage\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'download link', default=None, group='url')
+ if download_link:
+ track_id = self._search_regex(
+ r'(?ms)var TralbumData = .*?[{,]\s*id: (?P<id>\d+),?$',
+ webpage, 'track id')
+
+ download_webpage = self._download_webpage(
+ download_link, track_id, 'Downloading free downloads page')
+
+ blob = self._parse_json(
+ self._search_regex(
+ r'data-blob=(["\'])(?P<blob>{.+?})\1', download_webpage,
+ 'blob', group='blob'),
+ track_id, transform_source=unescapeHTML)
+
+ info = try_get(
+ blob, (lambda x: x['digital_items'][0],
+ lambda x: x['download_items'][0]), dict)
+ if info:
+ downloads = info.get('downloads')
+ if isinstance(downloads, dict):
+ if not track:
+ track = info.get('title')
+ if not artist:
+ artist = info.get('artist')
+ if not thumbnail:
+ thumbnail = info.get('thumb_url')
+
+ download_formats = {}
+ download_formats_list = blob.get('download_formats')
+ if isinstance(download_formats_list, list):
+ for f in blob['download_formats']:
+ name, ext = f.get('name'), f.get('file_extension')
+ if all(isinstance(x, compat_str) for x in (name, ext)):
+ download_formats[name] = ext.strip('.')
+
+ for format_id, f in downloads.items():
+ format_url = f.get('url')
+ if not format_url:
+ continue
+ # Stat URL generation algorithm is reverse engineered from
+ # download_*_bundle_*.js
+ stat_url = update_url_query(
+ format_url.replace('/download/', '/statdownload/'), {
+ '.rand': int(time.time() * 1000 * random.random()),
+ })
+ format_id = f.get('encoding_name') or format_id
+ stat = self._download_json(
+ stat_url, track_id, 'Downloading %s JSON' % format_id,
+ transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1],
+ fatal=False)
+ if not stat:
+ continue
+ retry_url = url_or_none(stat.get('retry_url'))
+ if not retry_url:
+ continue
+ formats.append({
+ 'url': self._proto_relative_url(retry_url, 'http:'),
+ 'ext': download_formats.get(format_id),
+ 'format_id': format_id,
+ 'format_note': f.get('description'),
+ 'filesize': parse_filesize(f.get('size_mb')),
+ 'vcodec': 'none',
+ })
+
+ self._sort_formats(formats)
+
+ title = '%s - %s' % (artist, track) if artist else track
+
+ if not duration:
+ duration = float_or_none(self._html_search_meta(
+ 'duration', webpage, default=None))
+
+ return {
+ 'id': track_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'uploader': artist,
+ 'timestamp': timestamp,
+ 'release_date': release_date,
+ 'duration': duration,
+ 'track': track,
+ 'track_number': track_number,
+ 'track_id': track_id,
+ 'artist': artist,
+ 'album': album,
+ 'formats': formats,
+ }
+
+
+class BandcampAlbumIE(InfoExtractor):
+ IE_NAME = 'Bandcamp:album'
+ _VALID_URL = r'https?://(?:(?P<subdomain>[^.]+)\.)?bandcamp\.com(?:/album/(?P<album_id>[^/?#&]+))?'
+
+ _TESTS = [{
+ 'url': 'http://blazo.bandcamp.com/album/jazz-format-mixtape-vol-1',
+ 'playlist': [
+ {
+ 'md5': '39bc1eded3476e927c724321ddf116cf',
+ 'info_dict': {
+ 'id': '1353101989',
+ 'ext': 'mp3',
+ 'title': 'Intro',
+ }
+ },
+ {
+ 'md5': '1a2c32e2691474643e912cc6cd4bffaa',
+ 'info_dict': {
+ 'id': '38097443',
+ 'ext': 'mp3',
+ 'title': 'Kero One - Keep It Alive (Blazo remix)',
+ }
+ },
+ ],
+ 'info_dict': {
+ 'title': 'Jazz Format Mixtape vol.1',
+ 'id': 'jazz-format-mixtape-vol-1',
+ 'uploader_id': 'blazo',
+ },
+ 'params': {
+ 'playlistend': 2
+ },
+ 'skip': 'Bandcamp imposes download limits.'
+ }, {
+ 'url': 'http://nightbringer.bandcamp.com/album/hierophany-of-the-open-grave',
+ 'info_dict': {
+ 'title': 'Hierophany of the Open Grave',
+ 'uploader_id': 'nightbringer',
+ 'id': 'hierophany-of-the-open-grave',
+ },
+ 'playlist_mincount': 9,
+ }, {
+ 'url': 'http://dotscale.bandcamp.com',
+ 'info_dict': {
+ 'title': 'Loom',
+ 'id': 'dotscale',
+ 'uploader_id': 'dotscale',
+ },
+ 'playlist_mincount': 7,
+ }, {
+ # with escaped quote in title
+ 'url': 'https://jstrecords.bandcamp.com/album/entropy-ep',
+ 'info_dict': {
+ 'title': '"Entropy" EP',
+ 'uploader_id': 'jstrecords',
+ 'id': 'entropy-ep',
+ },
+ 'playlist_mincount': 3,
+ }, {
+ # not all tracks have songs
+ 'url': 'https://insulters.bandcamp.com/album/we-are-the-plague',
+ 'info_dict': {
+ 'id': 'we-are-the-plague',
+ 'title': 'WE ARE THE PLAGUE',
+ 'uploader_id': 'insulters',
+ },
+ 'playlist_count': 2,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (False
+ if BandcampWeeklyIE.suitable(url) or BandcampIE.suitable(url)
+ else super(BandcampAlbumIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ uploader_id = mobj.group('subdomain')
+ album_id = mobj.group('album_id')
+ playlist_id = album_id or uploader_id
+ webpage = self._download_webpage(url, playlist_id)
+ track_elements = re.findall(
+ r'(?s)<div[^>]*>(.*?<a[^>]+href="([^"]+?)"[^>]+itemprop="url"[^>]*>.*?)</div>', webpage)
+ if not track_elements:
+ raise ExtractorError('The page doesn\'t contain any tracks')
+ # Only tracks with duration info have songs
+ entries = [
+ self.url_result(
+ compat_urlparse.urljoin(url, t_path),
+ ie=BandcampIE.ie_key(),
+ video_title=self._search_regex(
+ r'<span\b[^>]+\bitemprop=["\']name["\'][^>]*>([^<]+)',
+ elem_content, 'track title', fatal=False))
+ for elem_content, t_path in track_elements
+ if self._html_search_meta('duration', elem_content, default=None)]
+
+ title = self._html_search_regex(
+ r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"',
+ webpage, 'title', fatal=False)
+ if title:
+ title = title.replace(r'\"', '"')
+ return {
+ '_type': 'playlist',
+ 'uploader_id': uploader_id,
+ 'id': playlist_id,
+ 'title': title,
+ 'entries': entries,
+ }
+
+
+class BandcampWeeklyIE(InfoExtractor):
+ IE_NAME = 'Bandcamp:weekly'
+ _VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://bandcamp.com/?show=224',
+ 'md5': 'b00df799c733cf7e0c567ed187dea0fd',
+ 'info_dict': {
+ 'id': '224',
+ 'ext': 'opus',
+ 'title': 'BC Weekly April 4th 2017 - Magic Moments',
+ 'description': 'md5:5d48150916e8e02d030623a48512c874',
+ 'duration': 5829.77,
+ 'release_date': '20170404',
+ 'series': 'Bandcamp Weekly',
+ 'episode': 'Magic Moments',
+ 'episode_number': 208,
+ 'episode_id': '224',
+ }
+ }, {
+ 'url': 'https://bandcamp.com/?blah/blah@&show=228',
+ 'only_matching': True
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ blob = self._parse_json(
+ self._search_regex(
+ r'data-blob=(["\'])(?P<blob>{.+?})\1', webpage,
+ 'blob', group='blob'),
+ video_id, transform_source=unescapeHTML)
+
+ show = blob['bcw_show']
+
+ # This is desired because any invalid show id redirects to `bandcamp.com`
+ # which happens to expose the latest Bandcamp Weekly episode.
+ show_id = int_or_none(show.get('show_id')) or int_or_none(video_id)
+
+ formats = []
+ for format_id, format_url in show['audio_stream'].items():
+ if not url_or_none(format_url):
+ continue
+ for known_ext in KNOWN_EXTENSIONS:
+ if known_ext in format_id:
+ ext = known_ext
+ break
+ else:
+ ext = None
+ formats.append({
+ 'format_id': format_id,
+ 'url': format_url,
+ 'ext': ext,
+ 'vcodec': 'none',
+ })
+ self._sort_formats(formats)
+
+ title = show.get('audio_title') or 'Bandcamp Weekly'
+ subtitle = show.get('subtitle')
+ if subtitle:
+ title += ' - %s' % subtitle
+
+ episode_number = None
+ seq = blob.get('bcw_seq')
+
+ if seq and isinstance(seq, list):
+ try:
+ episode_number = next(
+ int_or_none(e.get('episode_number'))
+ for e in seq
+ if isinstance(e, dict) and int_or_none(e.get('id')) == show_id)
+ except StopIteration:
+ pass
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': show.get('desc') or show.get('short_desc'),
+ 'duration': float_or_none(show.get('audio_duration')),
+ 'is_live': False,
+ 'release_date': unified_strdate(show.get('published_date')),
+ 'series': 'Bandcamp Weekly',
+ 'episode': show.get('subtitle'),
+ 'episode_number': episode_number,
+ 'episode_id': compat_str(video_id),
+ 'formats': formats
+ }
diff --git a/youtube_dlc/extractor/bbc.py b/youtube_dlc/extractor/bbc.py
new file mode 100644
index 000000000..002c39c39
--- /dev/null
+++ b/youtube_dlc/extractor/bbc.py
@@ -0,0 +1,1359 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ dict_get,
+ ExtractorError,
+ float_or_none,
+ get_element_by_class,
+ int_or_none,
+ js_to_json,
+ parse_duration,
+ parse_iso8601,
+ try_get,
+ unescapeHTML,
+ url_or_none,
+ urlencode_postdata,
+ urljoin,
+)
+from ..compat import (
+ compat_etree_Element,
+ compat_HTTPError,
+ compat_urlparse,
+)
+
+
+class BBCCoUkIE(InfoExtractor):
+ IE_NAME = 'bbc.co.uk'
+ IE_DESC = 'BBC iPlayer'
+ _ID_REGEX = r'(?:[pbm][\da-z]{7}|w[\da-z]{7,14})'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?bbc\.co\.uk/
+ (?:
+ programmes/(?!articles/)|
+ iplayer(?:/[^/]+)?/(?:episode/|playlist/)|
+ music/(?:clips|audiovideo/popular)[/#]|
+ radio/player/|
+ sounds/play/|
+ events/[^/]+/play/[^/]+/
+ )
+ (?P<id>%s)(?!/(?:episodes|broadcasts|clips))
+ ''' % _ID_REGEX
+
+ _LOGIN_URL = 'https://account.bbc.com/signin'
+ _NETRC_MACHINE = 'bbc'
+
+ _MEDIASELECTOR_URLS = [
+ # Provides HQ HLS streams with even better quality that pc mediaset but fails
+ # with geolocation in some cases when it's even not geo restricted at all (e.g.
+ # http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
+ 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
+ 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
+ ]
+
+ _MEDIASELECTION_NS = 'http://bbc.co.uk/2008/mp/mediaselection'
+ _EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'
+
+ _NAMESPACES = (
+ _MEDIASELECTION_NS,
+ _EMP_PLAYLIST_NS,
+ )
+
+ _TESTS = [
+ {
+ 'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
+ 'info_dict': {
+ 'id': 'b039d07m',
+ 'ext': 'flv',
+ 'title': 'Kaleidoscope, Leonard Cohen',
+ 'description': 'The Canadian poet and songwriter reflects on his musical career.',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ },
+ {
+ 'url': 'http://www.bbc.co.uk/iplayer/episode/b00yng5w/The_Man_in_Black_Series_3_The_Printed_Name/',
+ 'info_dict': {
+ 'id': 'b00yng1d',
+ 'ext': 'flv',
+ 'title': 'The Man in Black: Series 3: The Printed Name',
+ 'description': "Mark Gatiss introduces Nicholas Pierpan's chilling tale of a writer's devilish pact with a mysterious man. Stars Ewan Bailey.",
+ 'duration': 1800,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Episode is no longer available on BBC iPlayer Radio',
+ },
+ {
+ 'url': 'http://www.bbc.co.uk/iplayer/episode/b03vhd1f/The_Voice_UK_Series_3_Blind_Auditions_5/',
+ 'info_dict': {
+ 'id': 'b00yng1d',
+ 'ext': 'flv',
+ 'title': 'The Voice UK: Series 3: Blind Auditions 5',
+ 'description': 'Emma Willis and Marvin Humes present the fifth set of blind auditions in the singing competition, as the coaches continue to build their teams based on voice alone.',
+ 'duration': 5100,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
+ },
+ {
+ 'url': 'http://www.bbc.co.uk/iplayer/episode/p026c7jt/tomorrows-worlds-the-unearthly-history-of-science-fiction-2-invasion',
+ 'info_dict': {
+ 'id': 'b03k3pb7',
+ 'ext': 'flv',
+ 'title': "Tomorrow's Worlds: The Unearthly History of Science Fiction",
+ 'description': '2. Invasion',
+ 'duration': 3600,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Currently BBC iPlayer TV programmes are available to play in the UK only',
+ }, {
+ 'url': 'http://www.bbc.co.uk/programmes/b04v20dw',
+ 'info_dict': {
+ 'id': 'b04v209v',
+ 'ext': 'flv',
+ 'title': 'Pete Tong, The Essential New Tune Special',
+ 'description': "Pete has a very special mix - all of 2014's Essential New Tunes!",
+ 'duration': 10800,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Episode is no longer available on BBC iPlayer Radio',
+ }, {
+ 'url': 'http://www.bbc.co.uk/music/clips/p022h44b',
+ 'note': 'Audio',
+ 'info_dict': {
+ 'id': 'p022h44j',
+ 'ext': 'flv',
+ 'title': 'BBC Proms Music Guides, Rachmaninov: Symphonic Dances',
+ 'description': "In this Proms Music Guide, Andrew McGregor looks at Rachmaninov's Symphonic Dances.",
+ 'duration': 227,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://www.bbc.co.uk/music/clips/p025c0zz',
+ 'note': 'Video',
+ 'info_dict': {
+ 'id': 'p025c103',
+ 'ext': 'flv',
+ 'title': 'Reading and Leeds Festival, 2014, Rae Morris - Closer (Live on BBC Three)',
+ 'description': 'Rae Morris performs Closer for BBC Three at Reading 2014',
+ 'duration': 226,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://www.bbc.co.uk/iplayer/episode/b054fn09/ad/natural-world-20152016-2-super-powered-owls',
+ 'info_dict': {
+ 'id': 'p02n76xf',
+ 'ext': 'flv',
+ 'title': 'Natural World, 2015-2016: 2. Super Powered Owls',
+ 'description': 'md5:e4db5c937d0e95a7c6b5e654d429183d',
+ 'duration': 3540,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'geolocation',
+ }, {
+ 'url': 'http://www.bbc.co.uk/iplayer/episode/b05zmgwn/royal-academy-summer-exhibition',
+ 'info_dict': {
+ 'id': 'b05zmgw1',
+ 'ext': 'flv',
+ 'description': 'Kirsty Wark and Morgan Quaintance visit the Royal Academy as it prepares for its annual artistic extravaganza, meeting people who have come together to make the show unique.',
+ 'title': 'Royal Academy Summer Exhibition',
+ 'duration': 3540,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'geolocation',
+ }, {
+ # iptv-all mediaset fails with geolocation however there is no geo restriction
+ # for this programme at all
+ 'url': 'http://www.bbc.co.uk/programmes/b06rkn85',
+ 'info_dict': {
+ 'id': 'b06rkms3',
+ 'ext': 'flv',
+ 'title': "Best of the Mini-Mixes 2015: Part 3, Annie Mac's Friday Night - BBC Radio 1",
+ 'description': "Annie has part three in the Best of the Mini-Mixes 2015, plus the year's Most Played!",
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Now it\'s really geo-restricted',
+ }, {
+ # compact player (https://github.com/ytdl-org/youtube-dl/issues/8147)
+ 'url': 'http://www.bbc.co.uk/programmes/p028bfkf/player',
+ 'info_dict': {
+ 'id': 'p028bfkj',
+ 'ext': 'flv',
+ 'title': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
+ 'description': 'Extract from BBC documentary Look Stranger - Giant Leeks and Magic Brews',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.bbc.co.uk/sounds/play/m0007jzb',
+ 'note': 'Audio',
+ 'info_dict': {
+ 'id': 'm0007jz9',
+ 'ext': 'mp4',
+ 'title': 'BBC Proms, 2019, Prom 34: West–Eastern Divan Orchestra',
+ 'description': "Live BBC Proms. West–Eastern Divan Orchestra with Daniel Barenboim and Martha Argerich.",
+ 'duration': 9840,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://www.bbc.co.uk/iplayer/playlist/p01dvks4',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bbc.co.uk/music/clips#p02frcc3',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bbc.co.uk/iplayer/cbeebies/episode/b0480276/bing-14-atchoo',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bbc.co.uk/radio/player/p03cchwf',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.bbc.co.uk/music/audiovideo/popular#p055bc55',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bbc.co.uk/programmes/w3csv1y9',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.bbc.co.uk/programmes/m00005xn',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.bbc.co.uk/programmes/w172w4dww1jqt5s',
+ 'only_matching': True,
+ }]
+
+ _USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading signin page')
+
+ login_form = self._hidden_inputs(login_page)
+
+ login_form.update({
+ 'username': username,
+ 'password': password,
+ })
+
+ post_url = urljoin(self._LOGIN_URL, self._search_regex(
+ r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
+ 'post url', default=self._LOGIN_URL, group='url'))
+
+ response, urlh = self._download_webpage_handle(
+ post_url, None, 'Logging in', data=urlencode_postdata(login_form),
+ headers={'Referer': self._LOGIN_URL})
+
+ if self._LOGIN_URL in urlh.geturl():
+ error = clean_html(get_element_by_class('form-message', response))
+ if error:
+ raise ExtractorError(
+ 'Unable to login: %s' % error, expected=True)
+ raise ExtractorError('Unable to log in')
+
+ def _real_initialize(self):
+ self._login()
+
+ class MediaSelectionError(Exception):
+ def __init__(self, id):
+ self.id = id
+
+ def _extract_asx_playlist(self, connection, programme_id):
+ asx = self._download_xml(connection.get('href'), programme_id, 'Downloading ASX playlist')
+ return [ref.get('href') for ref in asx.findall('./Entry/ref')]
+
+ def _extract_items(self, playlist):
+ return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
+
+ def _findall_ns(self, element, xpath):
+ elements = []
+ for ns in self._NAMESPACES:
+ elements.extend(element.findall(xpath % ns))
+ return elements
+
+ def _extract_medias(self, media_selection):
+ error = media_selection.find('./{%s}error' % self._MEDIASELECTION_NS)
+ if error is None:
+ media_selection.find('./{%s}error' % self._EMP_PLAYLIST_NS)
+ if error is not None:
+ raise BBCCoUkIE.MediaSelectionError(error.get('id'))
+ return self._findall_ns(media_selection, './{%s}media')
+
+ def _extract_connections(self, media):
+ return self._findall_ns(media, './{%s}connection')
+
+ def _get_subtitles(self, media, programme_id):
+ subtitles = {}
+ for connection in self._extract_connections(media):
+ cc_url = url_or_none(connection.get('href'))
+ if not cc_url:
+ continue
+ captions = self._download_xml(
+ cc_url, programme_id, 'Downloading captions', fatal=False)
+ if not isinstance(captions, compat_etree_Element):
+ continue
+ lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
+ subtitles[lang] = [
+ {
+ 'url': connection.get('href'),
+ 'ext': 'ttml',
+ },
+ ]
+ return subtitles
+
+ def _raise_extractor_error(self, media_selection_error):
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, media_selection_error.id),
+ expected=True)
+
+ def _download_media_selector(self, programme_id):
+ last_exception = None
+ for mediaselector_url in self._MEDIASELECTOR_URLS:
+ try:
+ return self._download_media_selector_url(
+ mediaselector_url % programme_id, programme_id)
+ except BBCCoUkIE.MediaSelectionError as e:
+ if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
+ last_exception = e
+ continue
+ self._raise_extractor_error(e)
+ self._raise_extractor_error(last_exception)
+
+ def _download_media_selector_url(self, url, programme_id=None):
+ media_selection = self._download_xml(
+ url, programme_id, 'Downloading media selection XML',
+ expected_status=(403, 404))
+ return self._process_media_selector(media_selection, programme_id)
+
+ def _process_media_selector(self, media_selection, programme_id):
+ formats = []
+ subtitles = None
+ urls = []
+
+ for media in self._extract_medias(media_selection):
+ kind = media.get('kind')
+ if kind in ('video', 'audio'):
+ bitrate = int_or_none(media.get('bitrate'))
+ encoding = media.get('encoding')
+ service = media.get('service')
+ width = int_or_none(media.get('width'))
+ height = int_or_none(media.get('height'))
+ file_size = int_or_none(media.get('media_file_size'))
+ for connection in self._extract_connections(media):
+ href = connection.get('href')
+ if href in urls:
+ continue
+ if href:
+ urls.append(href)
+ conn_kind = connection.get('kind')
+ protocol = connection.get('protocol')
+ supplier = connection.get('supplier')
+ transfer_format = connection.get('transferFormat')
+ format_id = supplier or conn_kind or protocol
+ if service:
+ format_id = '%s_%s' % (service, format_id)
+ # ASX playlist
+ if supplier == 'asx':
+ for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
+ formats.append({
+ 'url': ref,
+ 'format_id': 'ref%s_%s' % (i, format_id),
+ })
+ elif transfer_format == 'dash':
+ formats.extend(self._extract_mpd_formats(
+ href, programme_id, mpd_id=format_id, fatal=False))
+ elif transfer_format == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ href, programme_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id=format_id, fatal=False))
+ if re.search(self._USP_RE, href):
+ usp_formats = self._extract_m3u8_formats(
+ re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href),
+ programme_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id=format_id, fatal=False)
+ for f in usp_formats:
+ if f.get('height') and f['height'] > 720:
+ continue
+ formats.append(f)
+ elif transfer_format == 'hds':
+ formats.extend(self._extract_f4m_formats(
+ href, programme_id, f4m_id=format_id, fatal=False))
+ else:
+ if not service and not supplier and bitrate:
+ format_id += '-%d' % bitrate
+ fmt = {
+ 'format_id': format_id,
+ 'filesize': file_size,
+ }
+ if kind == 'video':
+ fmt.update({
+ 'width': width,
+ 'height': height,
+ 'tbr': bitrate,
+ 'vcodec': encoding,
+ })
+ else:
+ fmt.update({
+ 'abr': bitrate,
+ 'acodec': encoding,
+ 'vcodec': 'none',
+ })
+ if protocol in ('http', 'https'):
+ # Direct link
+ fmt.update({
+ 'url': href,
+ })
+ elif protocol == 'rtmp':
+ application = connection.get('application', 'ondemand')
+ auth_string = connection.get('authString')
+ identifier = connection.get('identifier')
+ server = connection.get('server')
+ fmt.update({
+ 'url': '%s://%s/%s?%s' % (protocol, server, application, auth_string),
+ 'play_path': identifier,
+ 'app': '%s?%s' % (application, auth_string),
+ 'page_url': 'http://www.bbc.co.uk',
+ 'player_url': 'http://www.bbc.co.uk/emp/releases/iplayer/revisions/617463_618125_4/617463_618125_4_emp.swf',
+ 'rtmp_live': False,
+ 'ext': 'flv',
+ })
+ else:
+ continue
+ formats.append(fmt)
+ elif kind == 'captions':
+ subtitles = self.extract_subtitles(media, programme_id)
+ return formats, subtitles
+
+ def _download_playlist(self, playlist_id):
+ try:
+ playlist = self._download_json(
+ 'http://www.bbc.co.uk/programmes/%s/playlist.json' % playlist_id,
+ playlist_id, 'Downloading playlist JSON')
+
+ version = playlist.get('defaultAvailableVersion')
+ if version:
+ smp_config = version['smpConfig']
+ title = smp_config['title']
+ description = smp_config['summary']
+ for item in smp_config['items']:
+ kind = item['kind']
+ if kind not in ('programme', 'radioProgramme'):
+ continue
+ programme_id = item.get('vpid')
+ duration = int_or_none(item.get('duration'))
+ formats, subtitles = self._download_media_selector(programme_id)
+ return programme_id, title, description, duration, formats, subtitles
+ except ExtractorError as ee:
+ if not (isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 404):
+ raise
+
+ # fallback to legacy playlist
+ return self._process_legacy_playlist(playlist_id)
+
+ def _process_legacy_playlist_url(self, url, display_id):
+ playlist = self._download_legacy_playlist_url(url, display_id)
+ return self._extract_from_legacy_playlist(playlist, display_id)
+
+ def _process_legacy_playlist(self, playlist_id):
+ return self._process_legacy_playlist_url(
+ 'http://www.bbc.co.uk/iplayer/playlist/%s' % playlist_id, playlist_id)
+
+ def _download_legacy_playlist_url(self, url, playlist_id=None):
+ return self._download_xml(
+ url, playlist_id, 'Downloading legacy playlist XML')
+
+ def _extract_from_legacy_playlist(self, playlist, playlist_id):
+ no_items = playlist.find('./{%s}noItems' % self._EMP_PLAYLIST_NS)
+ if no_items is not None:
+ reason = no_items.get('reason')
+ if reason == 'preAvailability':
+ msg = 'Episode %s is not yet available' % playlist_id
+ elif reason == 'postAvailability':
+ msg = 'Episode %s is no longer available' % playlist_id
+ elif reason == 'noMedia':
+ msg = 'Episode %s is not currently available' % playlist_id
+ else:
+ msg = 'Episode %s is not available: %s' % (playlist_id, reason)
+ raise ExtractorError(msg, expected=True)
+
+ for item in self._extract_items(playlist):
+ kind = item.get('kind')
+ if kind not in ('programme', 'radioProgramme'):
+ continue
+ title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text
+ description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS)
+ description = description_el.text if description_el is not None else None
+
+ def get_programme_id(item):
+ def get_from_attributes(item):
+ for p in ('identifier', 'group'):
+ value = item.get(p)
+ if value and re.match(r'^[pb][\da-z]{7}$', value):
+ return value
+ get_from_attributes(item)
+ mediator = item.find('./{%s}mediator' % self._EMP_PLAYLIST_NS)
+ if mediator is not None:
+ return get_from_attributes(mediator)
+
+ programme_id = get_programme_id(item)
+ duration = int_or_none(item.get('duration'))
+
+ if programme_id:
+ formats, subtitles = self._download_media_selector(programme_id)
+ else:
+ formats, subtitles = self._process_media_selector(item, playlist_id)
+ programme_id = playlist_id
+
+ return programme_id, title, description, duration, formats, subtitles
+
+ def _real_extract(self, url):
+ group_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, group_id, 'Downloading video page')
+
+ error = self._search_regex(
+ r'<div\b[^>]+\bclass=["\']smp__message delta["\'][^>]*>([^<]+)<',
+ webpage, 'error', default=None)
+ if error:
+ raise ExtractorError(error, expected=True)
+
+ programme_id = None
+ duration = None
+
+ tviplayer = self._search_regex(
+ r'mediator\.bind\(({.+?})\s*,\s*document\.getElementById',
+ webpage, 'player', default=None)
+
+ if tviplayer:
+ player = self._parse_json(tviplayer, group_id).get('player', {})
+ duration = int_or_none(player.get('duration'))
+ programme_id = player.get('vpid')
+
+ if not programme_id:
+ programme_id = self._search_regex(
+ r'"vpid"\s*:\s*"(%s)"' % self._ID_REGEX, webpage, 'vpid', fatal=False, default=None)
+
+ if programme_id:
+ formats, subtitles = self._download_media_selector(programme_id)
+ title = self._og_search_title(webpage, default=None) or self._html_search_regex(
+ (r'<h2[^>]+id="parent-title"[^>]*>(.+?)</h2>',
+ r'<div[^>]+class="info"[^>]*>\s*<h1>(.+?)</h1>'), webpage, 'title')
+ description = self._search_regex(
+ (r'<p class="[^"]*medium-description[^"]*">([^<]+)</p>',
+ r'<div[^>]+class="info_+synopsis"[^>]*>([^<]+)</div>'),
+ webpage, 'description', default=None)
+ if not description:
+ description = self._html_search_meta('description', webpage)
+ else:
+ programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': programme_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class BBCIE(BBCCoUkIE):
+ IE_NAME = 'bbc'
+ IE_DESC = 'BBC'
+ _VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'
+
+ _MEDIASELECTOR_URLS = [
+ # Provides HQ HLS streams but fails with geolocation in some cases when it's
+ # even not geo restricted at all
+ 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
+ # Provides more formats, namely direct mp4 links, but fails on some videos with
+ # notukerror for non UK (?) users (e.g.
+ # http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
+ 'http://open.live.bbc.co.uk/mediaselector/4/mtis/stream/%s',
+ # Provides fewer formats, but works everywhere for everybody (hopefully)
+ 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/journalism-pc/vpid/%s',
+ ]
+
+ _TESTS = [{
+ # article with multiple videos embedded with data-playable containing vpids
+ 'url': 'http://www.bbc.com/news/world-europe-32668511',
+ 'info_dict': {
+ 'id': 'world-europe-32668511',
+ 'title': 'Russia stages massive WW2 parade',
+ 'description': 'md5:00ff61976f6081841f759a08bf78cc9c',
+ },
+ 'playlist_count': 2,
+ }, {
+ # article with multiple videos embedded with data-playable (more videos)
+ 'url': 'http://www.bbc.com/news/business-28299555',
+ 'info_dict': {
+ 'id': 'business-28299555',
+ 'title': 'Farnborough Airshow: Video highlights',
+ 'description': 'BBC reports and video highlights at the Farnborough Airshow.',
+ },
+ 'playlist_count': 9,
+ 'skip': 'Save time',
+ }, {
+ # article with multiple videos embedded with `new SMP()`
+ # broken
+ 'url': 'http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460',
+ 'info_dict': {
+ 'id': '3662a707-0af9-3149-963f-47bea720b460',
+ 'title': 'BUGGER',
+ },
+ 'playlist_count': 18,
+ }, {
+ # single video embedded with data-playable containing vpid
+ 'url': 'http://www.bbc.com/news/world-europe-32041533',
+ 'info_dict': {
+ 'id': 'p02mprgb',
+ 'ext': 'mp4',
+ 'title': 'Aerial footage showed the site of the crash in the Alps - courtesy BFM TV',
+ 'description': 'md5:2868290467291b37feda7863f7a83f54',
+ 'duration': 47,
+ 'timestamp': 1427219242,
+ 'upload_date': '20150324',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ # article with single video embedded with data-playable containing XML playlist
+ # with direct video links as progressiveDownloadUrl (for now these are extracted)
+ # and playlist with f4m and m3u8 as streamingUrl
+ 'url': 'http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu',
+ 'info_dict': {
+ 'id': '150615_telabyad_kentin_cogu',
+ 'ext': 'mp4',
+ 'title': "YPG: Tel Abyad'ın tamamı kontrolümüzde",
+ 'description': 'md5:33a4805a855c9baf7115fcbde57e7025',
+ 'timestamp': 1434397334,
+ 'upload_date': '20150615',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ # single video embedded with data-playable containing XML playlists (regional section)
+ 'url': 'http://www.bbc.com/mundo/video_fotos/2015/06/150619_video_honduras_militares_hospitales_corrupcion_aw',
+ 'info_dict': {
+ 'id': '150619_video_honduras_militares_hospitales_corrupcion_aw',
+ 'ext': 'mp4',
+ 'title': 'Honduras militariza sus hospitales por nuevo escándalo de corrupción',
+ 'description': 'md5:1525f17448c4ee262b64b8f0c9ce66c8',
+ 'timestamp': 1434713142,
+ 'upload_date': '20150619',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ # single video from video playlist embedded with vxp-playlist-data JSON
+ 'url': 'http://www.bbc.com/news/video_and_audio/must_see/33376376',
+ 'info_dict': {
+ 'id': 'p02w6qjc',
+ 'ext': 'mp4',
+ 'title': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
+ 'duration': 56,
+ 'description': '''Judge Mindy Glazer: "I'm sorry to see you here... I always wondered what happened to you"''',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ # single video story with digitalData
+ 'url': 'http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret',
+ 'info_dict': {
+ 'id': 'p02q6gc4',
+ 'ext': 'flv',
+ 'title': 'Sri Lanka’s spicy secret',
+ 'description': 'As a new train line to Jaffna opens up the country’s north, travellers can experience a truly distinct slice of Tamil culture.',
+ 'timestamp': 1437674293,
+ 'upload_date': '20150723',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ # single video story without digitalData
+ 'url': 'http://www.bbc.com/autos/story/20130513-hyundais-rock-star',
+ 'info_dict': {
+ 'id': 'p018zqqg',
+ 'ext': 'mp4',
+ 'title': 'Hyundai Santa Fe Sport: Rock star',
+ 'description': 'md5:b042a26142c4154a6e472933cf20793d',
+ 'timestamp': 1415867444,
+ 'upload_date': '20141113',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ # single video embedded with Morph
+ 'url': 'http://www.bbc.co.uk/sport/live/olympics/36895975',
+ 'info_dict': {
+ 'id': 'p041vhd0',
+ 'ext': 'mp4',
+ 'title': "Nigeria v Japan - Men's First Round",
+ 'description': 'Live coverage of the first round from Group B at the Amazonia Arena.',
+ 'duration': 7980,
+ 'uploader': 'BBC Sport',
+ 'uploader_id': 'bbc_sport',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'Georestricted to UK',
+ }, {
+ # single video with playlist.sxml URL in playlist param
+ 'url': 'http://www.bbc.com/sport/0/football/33653409',
+ 'info_dict': {
+ 'id': 'p02xycnp',
+ 'ext': 'mp4',
+ 'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
+ 'description': 'BBC Sport\'s David Ornstein has the latest transfer gossip, including rumours of a Manchester United return for Cristiano Ronaldo.',
+ 'duration': 140,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ }
+ }, {
+ # article with multiple videos embedded with playlist.sxml in playlist param
+ 'url': 'http://www.bbc.com/sport/0/football/34475836',
+ 'info_dict': {
+ 'id': '34475836',
+ 'title': 'Jurgen Klopp: Furious football from a witty and winning coach',
+ 'description': 'Fast-paced football, wit, wisdom and a ready smile - why Liverpool fans should come to love new boss Jurgen Klopp.',
+ },
+ 'playlist_count': 3,
+ }, {
+ # school report article with single video
+ 'url': 'http://www.bbc.co.uk/schoolreport/35744779',
+ 'info_dict': {
+ 'id': '35744779',
+ 'title': 'School which breaks down barriers in Jerusalem',
+ },
+ 'playlist_count': 1,
+ }, {
+ # single video with playlist URL from weather section
+ 'url': 'http://www.bbc.com/weather/features/33601775',
+ 'only_matching': True,
+ }, {
+ # custom redirection to www.bbc.com
+ 'url': 'http://www.bbc.co.uk/news/science-environment-33661876',
+ 'only_matching': True,
+ }, {
+ # single video article embedded with data-media-vpid
+ 'url': 'http://www.bbc.co.uk/sport/rowing/35908187',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.bbc.co.uk/bbcthree/clip/73d0bbd0-abc3-4cea-b3c0-cdae21905eb1',
+ 'info_dict': {
+ 'id': 'p06556y7',
+ 'ext': 'mp4',
+ 'title': 'Transfers: Cristiano Ronaldo to Man Utd, Arsenal to spend?',
+ 'description': 'md5:4b7dfd063d5a789a1512e99662be3ddd',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }, {
+ # window.__PRELOADED_STATE__
+ 'url': 'https://www.bbc.co.uk/radio/play/b0b9z4yl',
+ 'info_dict': {
+ 'id': 'b0b9z4vz',
+ 'ext': 'mp4',
+ 'title': 'Prom 6: An American in Paris and Turangalila',
+ 'description': 'md5:51cf7d6f5c8553f197e58203bc78dff8',
+ 'uploader': 'Radio 3',
+ 'uploader_id': 'bbc_radio_three',
+ },
+ }, {
+ 'url': 'http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227',
+ 'info_dict': {
+ 'id': 'p06w9tws',
+ 'ext': 'mp4',
+ 'title': 'md5:2fabf12a726603193a2879a055f72514',
+ 'description': 'Learn English words and phrases from this story',
+ },
+ 'add_ie': [BBCCoUkIE.ie_key()],
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ EXCLUDE_IE = (BBCCoUkIE, BBCCoUkArticleIE, BBCCoUkIPlayerPlaylistIE, BBCCoUkPlaylistIE)
+ return (False if any(ie.suitable(url) for ie in EXCLUDE_IE)
+ else super(BBCIE, cls).suitable(url))
+
+ def _extract_from_media_meta(self, media_meta, video_id):
+ # Direct links to media in media metadata (e.g.
+ # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
+ # TODO: there are also f4m and m3u8 streams incorporated in playlist.sxml
+ source_files = media_meta.get('sourceFiles')
+ if source_files:
+ return [{
+ 'url': f['url'],
+ 'format_id': format_id,
+ 'ext': f.get('encoding'),
+ 'tbr': float_or_none(f.get('bitrate'), 1000),
+ 'filesize': int_or_none(f.get('filesize')),
+ } for format_id, f in source_files.items() if f.get('url')], []
+
+ programme_id = media_meta.get('externalId')
+ if programme_id:
+ return self._download_media_selector(programme_id)
+
+ # Process playlist.sxml as legacy playlist
+ href = media_meta.get('href')
+ if href:
+ playlist = self._download_legacy_playlist_url(href)
+ _, _, _, _, formats, subtitles = self._extract_from_legacy_playlist(playlist, video_id)
+ return formats, subtitles
+
+ return [], []
+
+ def _extract_from_playlist_sxml(self, url, playlist_id, timestamp):
+ programme_id, title, description, duration, formats, subtitles = \
+ self._process_legacy_playlist_url(url, playlist_id)
+ self._sort_formats(formats)
+ return {
+ 'id': programme_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ json_ld_info = self._search_json_ld(webpage, playlist_id, default={})
+ timestamp = json_ld_info.get('timestamp')
+
+ playlist_title = json_ld_info.get('title')
+ if not playlist_title:
+ playlist_title = self._og_search_title(
+ webpage, default=None) or self._html_search_regex(
+ r'<title>(.+?)</title>', webpage, 'playlist title', default=None)
+ if playlist_title:
+ playlist_title = re.sub(r'(.+)\s*-\s*BBC.*?$', r'\1', playlist_title).strip()
+
+ playlist_description = json_ld_info.get(
+ 'description') or self._og_search_description(webpage, default=None)
+
+ if not timestamp:
+ timestamp = parse_iso8601(self._search_regex(
+ [r'<meta[^>]+property="article:published_time"[^>]+content="([^"]+)"',
+ r'itemprop="datePublished"[^>]+datetime="([^"]+)"',
+ r'"datePublished":\s*"([^"]+)'],
+ webpage, 'date', default=None))
+
+ entries = []
+
+ # article with multiple videos embedded with playlist.sxml (e.g.
+ # http://www.bbc.com/sport/0/football/34475836)
+ playlists = re.findall(r'<param[^>]+name="playlist"[^>]+value="([^"]+)"', webpage)
+ playlists.extend(re.findall(r'data-media-id="([^"]+/playlist\.sxml)"', webpage))
+ if playlists:
+ entries = [
+ self._extract_from_playlist_sxml(playlist_url, playlist_id, timestamp)
+ for playlist_url in playlists]
+
+ # news article with multiple videos embedded with data-playable
+ data_playables = re.findall(r'data-playable=(["\'])({.+?})\1', webpage)
+ if data_playables:
+ for _, data_playable_json in data_playables:
+ data_playable = self._parse_json(
+ unescapeHTML(data_playable_json), playlist_id, fatal=False)
+ if not data_playable:
+ continue
+ settings = data_playable.get('settings', {})
+ if settings:
+ # data-playable with video vpid in settings.playlistObject.items (e.g.
+ # http://www.bbc.com/news/world-us-canada-34473351)
+ playlist_object = settings.get('playlistObject', {})
+ if playlist_object:
+ items = playlist_object.get('items')
+ if items and isinstance(items, list):
+ title = playlist_object['title']
+ description = playlist_object.get('summary')
+ duration = int_or_none(items[0].get('duration'))
+ programme_id = items[0].get('vpid')
+ formats, subtitles = self._download_media_selector(programme_id)
+ self._sort_formats(formats)
+ entries.append({
+ 'id': programme_id,
+ 'title': title,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ })
+ else:
+ # data-playable without vpid but with a playlist.sxml URLs
+ # in otherSettings.playlist (e.g.
+ # http://www.bbc.com/turkce/multimedya/2015/10/151010_vid_ankara_patlama_ani)
+ playlist = data_playable.get('otherSettings', {}).get('playlist', {})
+ if playlist:
+ entry = None
+ for key in ('streaming', 'progressiveDownload'):
+ playlist_url = playlist.get('%sUrl' % key)
+ if not playlist_url:
+ continue
+ try:
+ info = self._extract_from_playlist_sxml(
+ playlist_url, playlist_id, timestamp)
+ if not entry:
+ entry = info
+ else:
+ entry['title'] = info['title']
+ entry['formats'].extend(info['formats'])
+ except Exception as e:
+ # Some playlist URL may fail with 500, at the same time
+ # the other one may work fine (e.g.
+ # http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu)
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500:
+ continue
+ raise
+ if entry:
+ self._sort_formats(entry['formats'])
+ entries.append(entry)
+
+ if entries:
+ return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
+
+ # http://www.bbc.co.uk/learningenglish/chinese/features/lingohack/ep-181227
+ group_id = self._search_regex(
+ r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
+ webpage, 'group id', default=None)
+ if playlist_id:
+ return self.url_result(
+ 'https://www.bbc.co.uk/programmes/%s' % group_id,
+ ie=BBCCoUkIE.ie_key())
+
+ # single video story (e.g. http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
+ programme_id = self._search_regex(
+ [r'data-(?:video-player|media)-vpid="(%s)"' % self._ID_REGEX,
+ r'<param[^>]+name="externalIdentifier"[^>]+value="(%s)"' % self._ID_REGEX,
+ r'videoId\s*:\s*["\'](%s)["\']' % self._ID_REGEX],
+ webpage, 'vpid', default=None)
+
+ if programme_id:
+ formats, subtitles = self._download_media_selector(programme_id)
+ self._sort_formats(formats)
+ # digitalData may be missing (e.g. http://www.bbc.com/autos/story/20130513-hyundais-rock-star)
+ digital_data = self._parse_json(
+ self._search_regex(
+ r'var\s+digitalData\s*=\s*({.+?});?\n', webpage, 'digital data', default='{}'),
+ programme_id, fatal=False)
+ page_info = digital_data.get('page', {}).get('pageInfo', {})
+ title = page_info.get('pageName') or self._og_search_title(webpage)
+ description = page_info.get('description') or self._og_search_description(webpage)
+ timestamp = parse_iso8601(page_info.get('publicationDate')) or timestamp
+ return {
+ 'id': programme_id,
+ 'title': title,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ # Morph based embed (e.g. http://www.bbc.co.uk/sport/live/olympics/36895975)
+ # There are several setPayload calls may be present but the video
+ # seems to be always related to the first one
+ morph_payload = self._parse_json(
+ self._search_regex(
+ r'Morph\.setPayload\([^,]+,\s*({.+?})\);',
+ webpage, 'morph payload', default='{}'),
+ playlist_id, fatal=False)
+ if morph_payload:
+ components = try_get(morph_payload, lambda x: x['body']['components'], list) or []
+ for component in components:
+ if not isinstance(component, dict):
+ continue
+ lead_media = try_get(component, lambda x: x['props']['leadMedia'], dict)
+ if not lead_media:
+ continue
+ identifiers = lead_media.get('identifiers')
+ if not identifiers or not isinstance(identifiers, dict):
+ continue
+ programme_id = identifiers.get('vpid') or identifiers.get('playablePid')
+ if not programme_id:
+ continue
+ title = lead_media.get('title') or self._og_search_title(webpage)
+ formats, subtitles = self._download_media_selector(programme_id)
+ self._sort_formats(formats)
+ description = lead_media.get('summary')
+ uploader = lead_media.get('masterBrand')
+ uploader_id = lead_media.get('mid')
+ duration = None
+ duration_d = lead_media.get('duration')
+ if isinstance(duration_d, dict):
+ duration = parse_duration(dict_get(
+ duration_d, ('rawDuration', 'formattedDuration', 'spokenDuration')))
+ return {
+ 'id': programme_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ preload_state = self._parse_json(self._search_regex(
+ r'window\.__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
+ 'preload state', default='{}'), playlist_id, fatal=False)
+ if preload_state:
+ current_programme = preload_state.get('programmes', {}).get('current') or {}
+ programme_id = current_programme.get('id')
+ if current_programme and programme_id and current_programme.get('type') == 'playable_item':
+ title = current_programme.get('titles', {}).get('tertiary') or playlist_title
+ formats, subtitles = self._download_media_selector(programme_id)
+ self._sort_formats(formats)
+ synopses = current_programme.get('synopses') or {}
+ network = current_programme.get('network') or {}
+ duration = int_or_none(
+ current_programme.get('duration', {}).get('value'))
+ thumbnail = None
+ image_url = current_programme.get('image_url')
+ if image_url:
+ thumbnail = image_url.replace('{recipe}', '1920x1920')
+ return {
+ 'id': programme_id,
+ 'title': title,
+ 'description': dict_get(synopses, ('long', 'medium', 'short')),
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'uploader': network.get('short_title'),
+ 'uploader_id': network.get('id'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ bbc3_config = self._parse_json(
+ self._search_regex(
+ r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
+ 'bbcthree config', default='{}'),
+ playlist_id, transform_source=js_to_json, fatal=False)
+ if bbc3_config:
+ bbc3_playlist = try_get(
+ bbc3_config, lambda x: x['payload']['content']['bbcMedia']['playlist'],
+ dict)
+ if bbc3_playlist:
+ playlist_title = bbc3_playlist.get('title') or playlist_title
+ thumbnail = bbc3_playlist.get('holdingImageURL')
+ entries = []
+ for bbc3_item in bbc3_playlist['items']:
+ programme_id = bbc3_item.get('versionID')
+ if not programme_id:
+ continue
+ formats, subtitles = self._download_media_selector(programme_id)
+ self._sort_formats(formats)
+ entries.append({
+ 'id': programme_id,
+ 'title': playlist_title,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ })
+ return self.playlist_result(
+ entries, playlist_id, playlist_title, playlist_description)
+
+ def extract_all(pattern):
+ return list(filter(None, map(
+ lambda s: self._parse_json(s, playlist_id, fatal=False),
+ re.findall(pattern, webpage))))
+
+ # Multiple video article (e.g.
+ # http://www.bbc.co.uk/blogs/adamcurtis/entries/3662a707-0af9-3149-963f-47bea720b460)
+ EMBED_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:[^/]+/)+%s(?:\b[^"]+)?' % self._ID_REGEX
+ entries = []
+ for match in extract_all(r'new\s+SMP\(({.+?})\)'):
+ embed_url = match.get('playerSettings', {}).get('externalEmbedUrl')
+ if embed_url and re.match(EMBED_URL, embed_url):
+ entries.append(embed_url)
+ entries.extend(re.findall(
+ r'setPlaylist\("(%s)"\)' % EMBED_URL, webpage))
+ if entries:
+ return self.playlist_result(
+ [self.url_result(entry_, 'BBCCoUk') for entry_ in entries],
+ playlist_id, playlist_title, playlist_description)
+
+ # Multiple video article (e.g. http://www.bbc.com/news/world-europe-32668511)
+ medias = extract_all(r"data-media-meta='({[^']+})'")
+
+ if not medias:
+ # Single video article (e.g. http://www.bbc.com/news/video_and_audio/international)
+ media_asset = self._search_regex(
+ r'mediaAssetPage\.init\(\s*({.+?}), "/',
+ webpage, 'media asset', default=None)
+ if media_asset:
+ media_asset_page = self._parse_json(media_asset, playlist_id, fatal=False)
+ medias = []
+ for video in media_asset_page.get('videos', {}).values():
+ medias.extend(video.values())
+
+ if not medias:
+ # Multiple video playlist with single `now playing` entry (e.g.
+ # http://www.bbc.com/news/video_and_audio/must_see/33767813)
+ vxp_playlist = self._parse_json(
+ self._search_regex(
+ r'<script[^>]+class="vxp-playlist-data"[^>]+type="application/json"[^>]*>([^<]+)</script>',
+ webpage, 'playlist data'),
+ playlist_id)
+ playlist_medias = []
+ for item in vxp_playlist:
+ media = item.get('media')
+ if not media:
+ continue
+ playlist_medias.append(media)
+ # Download single video if found media with asset id matching the video id from URL
+ if item.get('advert', {}).get('assetId') == playlist_id:
+ medias = [media]
+ break
+ # Fallback to the whole playlist
+ if not medias:
+ medias = playlist_medias
+
+ entries = []
+ for num, media_meta in enumerate(medias, start=1):
+ formats, subtitles = self._extract_from_media_meta(media_meta, playlist_id)
+ if not formats:
+ continue
+ self._sort_formats(formats)
+
+ video_id = media_meta.get('externalId')
+ if not video_id:
+ video_id = playlist_id if len(medias) == 1 else '%s-%s' % (playlist_id, num)
+
+ title = media_meta.get('caption')
+ if not title:
+ title = playlist_title if len(medias) == 1 else '%s - Video %s' % (playlist_title, num)
+
+ duration = int_or_none(media_meta.get('durationInSeconds')) or parse_duration(media_meta.get('duration'))
+
+ images = []
+ for image in media_meta.get('images', {}).values():
+ images.extend(image.values())
+ if 'image' in media_meta:
+ images.append(media_meta['image'])
+
+ thumbnails = [{
+ 'url': image.get('href'),
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ } for image in images]
+
+ entries.append({
+ 'id': video_id,
+ 'title': title,
+ 'thumbnails': thumbnails,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ })
+
+ return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
+
+
+class BBCCoUkArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/articles/(?P<id>[a-zA-Z0-9]+)'
+ IE_NAME = 'bbc.co.uk:article'
+ IE_DESC = 'BBC articles'
+
+ _TEST = {
+ 'url': 'http://www.bbc.co.uk/programmes/articles/3jNQLTMrPlYGTBn0WV6M2MS/not-your-typical-role-model-ada-lovelace-the-19th-century-programmer',
+ 'info_dict': {
+ 'id': '3jNQLTMrPlYGTBn0WV6M2MS',
+ 'title': 'Calculating Ada: The Countess of Computing - Not your typical role model: Ada Lovelace the 19th century programmer - BBC Four',
+ 'description': 'Hannah Fry reveals some of her surprising discoveries about Ada Lovelace during filming.',
+ },
+ 'playlist_count': 4,
+ 'add_ie': ['BBCCoUk'],
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage).strip()
+
+ entries = [self.url_result(programme_url) for programme_url in re.findall(
+ r'<div[^>]+typeof="Clip"[^>]+resource="([^"]+)"', webpage)]
+
+ return self.playlist_result(entries, playlist_id, title, description)
+
+
+class BBCCoUkPlaylistBaseIE(InfoExtractor):
+ def _entries(self, webpage, url, playlist_id):
+ single_page = 'page' in compat_urlparse.parse_qs(
+ compat_urlparse.urlparse(url).query)
+ for page_num in itertools.count(2):
+ for video_id in re.findall(
+ self._VIDEO_ID_TEMPLATE % BBCCoUkIE._ID_REGEX, webpage):
+ yield self.url_result(
+ self._URL_TEMPLATE % video_id, BBCCoUkIE.ie_key())
+ if single_page:
+ return
+ next_page = self._search_regex(
+ r'<li[^>]+class=(["\'])pagination_+next\1[^>]*><a[^>]+href=(["\'])(?P<url>(?:(?!\2).)+)\2',
+ webpage, 'next page url', default=None, group='url')
+ if not next_page:
+ break
+ webpage = self._download_webpage(
+ compat_urlparse.urljoin(url, next_page), playlist_id,
+ 'Downloading page %d' % page_num, page_num)
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ title, description = self._extract_title_and_description(webpage)
+
+ return self.playlist_result(
+ self._entries(webpage, url, playlist_id),
+ playlist_id, title, description)
+
+
+class BBCCoUkIPlayerPlaylistIE(BBCCoUkPlaylistBaseIE):
+ IE_NAME = 'bbc.co.uk:iplayer:playlist'
+ _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/iplayer/(?:episodes|group)/(?P<id>%s)' % BBCCoUkIE._ID_REGEX
+ _URL_TEMPLATE = 'http://www.bbc.co.uk/iplayer/episode/%s'
+ _VIDEO_ID_TEMPLATE = r'data-ip-id=["\'](%s)'
+ _TESTS = [{
+ 'url': 'http://www.bbc.co.uk/iplayer/episodes/b05rcz9v',
+ 'info_dict': {
+ 'id': 'b05rcz9v',
+ 'title': 'The Disappearance',
+ 'description': 'French thriller serial about a missing teenager.',
+ },
+ 'playlist_mincount': 6,
+ 'skip': 'This programme is not currently available on BBC iPlayer',
+ }, {
+ # Available for over a year unlike 30 days for most other programmes
+ 'url': 'http://www.bbc.co.uk/iplayer/group/p02tcc32',
+ 'info_dict': {
+ 'id': 'p02tcc32',
+ 'title': 'Bohemian Icons',
+ 'description': 'md5:683e901041b2fe9ba596f2ab04c4dbe7',
+ },
+ 'playlist_mincount': 10,
+ }]
+
+ def _extract_title_and_description(self, webpage):
+ title = self._search_regex(r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
+ description = self._search_regex(
+ r'<p[^>]+class=(["\'])subtitle\1[^>]*>(?P<value>[^<]+)</p>',
+ webpage, 'description', fatal=False, group='value')
+ return title, description
+
+
+class BBCCoUkPlaylistIE(BBCCoUkPlaylistBaseIE):
+ IE_NAME = 'bbc.co.uk:playlist'
+ _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/programmes/(?P<id>%s)/(?:episodes|broadcasts|clips)' % BBCCoUkIE._ID_REGEX
+ _URL_TEMPLATE = 'http://www.bbc.co.uk/programmes/%s'
+ _VIDEO_ID_TEMPLATE = r'data-pid=["\'](%s)'
+ _TESTS = [{
+ 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
+ 'info_dict': {
+ 'id': 'b05rcz9v',
+ 'title': 'The Disappearance - Clips - BBC Four',
+ 'description': 'French thriller serial about a missing teenager.',
+ },
+ 'playlist_mincount': 7,
+ }, {
+ # multipage playlist, explicit page
+ 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips?page=1',
+ 'info_dict': {
+ 'id': 'b00mfl7n',
+ 'title': 'Frozen Planet - Clips - BBC One',
+ 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
+ },
+ 'playlist_mincount': 24,
+ }, {
+ # multipage playlist, all pages
+ 'url': 'http://www.bbc.co.uk/programmes/b00mfl7n/clips',
+ 'info_dict': {
+ 'id': 'b00mfl7n',
+ 'title': 'Frozen Planet - Clips - BBC One',
+ 'description': 'md5:65dcbf591ae628dafe32aa6c4a4a0d8c',
+ },
+ 'playlist_mincount': 142,
+ }, {
+ 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/broadcasts/2016/06',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bbc.co.uk/programmes/b05rcz9v/clips',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bbc.co.uk/programmes/b055jkys/episodes/player',
+ 'only_matching': True,
+ }]
+
+ def _extract_title_and_description(self, webpage):
+ title = self._og_search_title(webpage, fatal=False)
+ description = self._og_search_description(webpage)
+ return title, description
diff --git a/youtube_dl/extractor/beampro.py b/youtube_dlc/extractor/beampro.py
index 86abdae00..86abdae00 100644
--- a/youtube_dl/extractor/beampro.py
+++ b/youtube_dlc/extractor/beampro.py
diff --git a/youtube_dl/extractor/beatport.py b/youtube_dlc/extractor/beatport.py
index e60709417..e60709417 100644
--- a/youtube_dl/extractor/beatport.py
+++ b/youtube_dlc/extractor/beatport.py
diff --git a/youtube_dl/extractor/beeg.py b/youtube_dlc/extractor/beeg.py
index 5788d13ba..5788d13ba 100644
--- a/youtube_dl/extractor/beeg.py
+++ b/youtube_dlc/extractor/beeg.py
diff --git a/youtube_dl/extractor/behindkink.py b/youtube_dlc/extractor/behindkink.py
index 9bca853b3..9bca853b3 100644
--- a/youtube_dl/extractor/behindkink.py
+++ b/youtube_dlc/extractor/behindkink.py
diff --git a/youtube_dlc/extractor/bellmedia.py b/youtube_dlc/extractor/bellmedia.py
new file mode 100644
index 000000000..9f9de96c6
--- /dev/null
+++ b/youtube_dlc/extractor/bellmedia.py
@@ -0,0 +1,88 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class BellMediaIE(InfoExtractor):
+ _VALID_URL = r'''(?x)https?://(?:www\.)?
+ (?P<domain>
+ (?:
+ ctv|
+ tsn|
+ bnn(?:bloomberg)?|
+ thecomedynetwork|
+ discovery|
+ discoveryvelocity|
+ sciencechannel|
+ investigationdiscovery|
+ animalplanet|
+ bravo|
+ mtv|
+ space|
+ etalk|
+ marilyn
+ )\.ca|
+ (?:much|cp24)\.com
+ )/.*?(?:\b(?:vid(?:eoid)?|clipId)=|-vid|~|%7E|/(?:episode)?)(?P<id>[0-9]{6,})'''
+ _TESTS = [{
+ 'url': 'https://www.bnnbloomberg.ca/video/david-cockfield-s-top-picks~1403070',
+ 'md5': '36d3ef559cfe8af8efe15922cd3ce950',
+ 'info_dict': {
+ 'id': '1403070',
+ 'ext': 'flv',
+ 'title': 'David Cockfield\'s Top Picks',
+ 'description': 'md5:810f7f8c6a83ad5b48677c3f8e5bb2c3',
+ 'upload_date': '20180525',
+ 'timestamp': 1527288600,
+ },
+ }, {
+ 'url': 'http://www.thecomedynetwork.ca/video/player?vid=923582',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.tsn.ca/video/expectations-high-for-milos-raonic-at-us-open~939549',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bnn.ca/video/berman-s-call-part-two-viewer-questions~939654',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ctv.ca/YourMorning/Video/S1E6-Monday-August-29-2016-vid938009',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.much.com/shows/atmidnight/episode948007/tuesday-september-13-2016',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.much.com/shows/the-almost-impossible-gameshow/928979/episode-6',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ctv.ca/DCs-Legends-of-Tomorrow/Video/S2E11-Turncoat-vid1051430',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.etalk.ca/video?videoid=663455',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.cp24.com/video?clipId=1982548',
+ 'only_matching': True,
+ }]
+ _DOMAINS = {
+ 'thecomedynetwork': 'comedy',
+ 'discoveryvelocity': 'discvel',
+ 'sciencechannel': 'discsci',
+ 'investigationdiscovery': 'invdisc',
+ 'animalplanet': 'aniplan',
+ 'etalk': 'ctv',
+ 'bnnbloomberg': 'bnn',
+ 'marilyn': 'ctv_marilyn',
+ }
+
+ def _real_extract(self, url):
+ domain, video_id = re.match(self._VALID_URL, url).groups()
+ domain = domain.split('.')[0]
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'url': '9c9media:%s_web:%s' % (self._DOMAINS.get(domain, domain), video_id),
+ 'ie_key': 'NineCNineMedia',
+ }
diff --git a/youtube_dl/extractor/bet.py b/youtube_dlc/extractor/bet.py
index d7ceaa85e..d7ceaa85e 100644
--- a/youtube_dl/extractor/bet.py
+++ b/youtube_dlc/extractor/bet.py
diff --git a/youtube_dl/extractor/bfi.py b/youtube_dlc/extractor/bfi.py
index 60c8944b5..60c8944b5 100644
--- a/youtube_dl/extractor/bfi.py
+++ b/youtube_dlc/extractor/bfi.py
diff --git a/youtube_dl/extractor/bigflix.py b/youtube_dlc/extractor/bigflix.py
index 28e3e59f6..28e3e59f6 100644
--- a/youtube_dl/extractor/bigflix.py
+++ b/youtube_dlc/extractor/bigflix.py
diff --git a/youtube_dl/extractor/bild.py b/youtube_dlc/extractor/bild.py
index b8dfbd42b..b8dfbd42b 100644
--- a/youtube_dl/extractor/bild.py
+++ b/youtube_dlc/extractor/bild.py
diff --git a/youtube_dlc/extractor/bilibili.py b/youtube_dlc/extractor/bilibili.py
new file mode 100644
index 000000000..d39ee8ffe
--- /dev/null
+++ b/youtube_dlc/extractor/bilibili.py
@@ -0,0 +1,450 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import hashlib
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ float_or_none,
+ parse_iso8601,
+ smuggle_url,
+ str_or_none,
+ strip_jsonp,
+ unified_timestamp,
+ unsmuggle_url,
+ urlencode_postdata,
+)
+
+
+class BiliBiliIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:(?:www|bangumi)\.)?
+ bilibili\.(?:tv|com)/
+ (?:
+ (?:
+ video/[aA][vV]|
+ anime/(?P<anime_id>\d+)/play\#
+ )(?P<id_bv>\d+)|
+ video/[bB][vV](?P<id>[^/?#&]+)
+ )
+ '''
+
+ _TESTS = [{
+ 'url': 'http://www.bilibili.tv/video/av1074402/',
+ 'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
+ 'info_dict': {
+ 'id': '1074402',
+ 'ext': 'flv',
+ 'title': '【金坷垃】金泡沫',
+ 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
+ 'duration': 308.067,
+ 'timestamp': 1398012678,
+ 'upload_date': '20140420',
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'uploader': '菊子桑',
+ 'uploader_id': '156160',
+ },
+ }, {
+ # Tested in BiliBiliBangumiIE
+ 'url': 'http://bangumi.bilibili.com/anime/1869/play#40062',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://bangumi.bilibili.com/anime/5802/play#100643',
+ 'md5': '3f721ad1e75030cc06faf73587cfec57',
+ 'info_dict': {
+ 'id': '100643',
+ 'ext': 'mp4',
+ 'title': 'CHAOS;CHILD',
+ 'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...',
+ },
+ 'skip': 'Geo-restricted to China',
+ }, {
+ # Title with double quotes
+ 'url': 'http://www.bilibili.com/video/av8903802/',
+ 'info_dict': {
+ 'id': '8903802',
+ 'title': '阿滴英文|英文歌分享#6 "Closer',
+ 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '8903802_part1',
+ 'ext': 'flv',
+ 'title': '阿滴英文|英文歌分享#6 "Closer',
+ 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
+ 'uploader': '阿滴英文',
+ 'uploader_id': '65880958',
+ 'timestamp': 1488382634,
+ 'upload_date': '20170301',
+ },
+ 'params': {
+ 'skip_download': True, # Test metadata only
+ },
+ }, {
+ 'info_dict': {
+ 'id': '8903802_part2',
+ 'ext': 'flv',
+ 'title': '阿滴英文|英文歌分享#6 "Closer',
+ 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
+ 'uploader': '阿滴英文',
+ 'uploader_id': '65880958',
+ 'timestamp': 1488382634,
+ 'upload_date': '20170301',
+ },
+ 'params': {
+ 'skip_download': True, # Test metadata only
+ },
+ }]
+ }, {
+ # new BV video id format
+ 'url': 'https://www.bilibili.com/video/BV1JE411F741',
+ 'only_matching': True,
+ }]
+
+ _APP_KEY = 'iVGUTjsxvpLeuDCf'
+ _BILIBILI_KEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt'
+
+ def _report_error(self, result):
+ if 'message' in result:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, result['message']), expected=True)
+ elif 'code' in result:
+ raise ExtractorError('%s returns error %d' % (self.IE_NAME, result['code']), expected=True)
+ else:
+ raise ExtractorError('Can\'t extract Bangumi episode ID')
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id') or mobj.group('id_bv')
+ anime_id = mobj.group('anime_id')
+ webpage = self._download_webpage(url, video_id)
+
+ if 'anime/' not in url:
+ cid = self._search_regex(
+ r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid',
+ default=None
+ ) or compat_parse_qs(self._search_regex(
+ [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
+ r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)',
+ r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
+ webpage, 'player parameters'))['cid'][0]
+ else:
+ if 'no_bangumi_tip' not in smuggled_data:
+ self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run youtube-dlc with %s' % (
+ video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id)))
+ headers = {
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+ 'Referer': url
+ }
+ headers.update(self.geo_verification_headers())
+
+ js = self._download_json(
+ 'http://bangumi.bilibili.com/web_api/get_source', video_id,
+ data=urlencode_postdata({'episode_id': video_id}),
+ headers=headers)
+ if 'result' not in js:
+ self._report_error(js)
+ cid = js['result']['cid']
+
+ headers = {
+ 'Referer': url
+ }
+ headers.update(self.geo_verification_headers())
+
+ entries = []
+
+ RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4')
+ for num, rendition in enumerate(RENDITIONS, start=1):
+ payload = 'appkey=%s&cid=%s&otype=json&%s' % (self._APP_KEY, cid, rendition)
+ sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
+
+ video_info = self._download_json(
+ 'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign),
+ video_id, note='Downloading video info page',
+ headers=headers, fatal=num == len(RENDITIONS))
+
+ if not video_info:
+ continue
+
+ if 'durl' not in video_info:
+ if num < len(RENDITIONS):
+ continue
+ self._report_error(video_info)
+
+ for idx, durl in enumerate(video_info['durl']):
+ formats = [{
+ 'url': durl['url'],
+ 'filesize': int_or_none(durl['size']),
+ }]
+ for backup_url in durl.get('backup_url', []):
+ formats.append({
+ 'url': backup_url,
+ # backup URLs have lower priorities
+ 'preference': -2 if 'hd.mp4' in backup_url else -3,
+ })
+
+ for a_format in formats:
+ a_format.setdefault('http_headers', {}).update({
+ 'Referer': url,
+ })
+
+ self._sort_formats(formats)
+
+ entries.append({
+ 'id': '%s_part%s' % (video_id, idx),
+ 'duration': float_or_none(durl.get('length'), 1000),
+ 'formats': formats,
+ })
+ break
+
+ title = self._html_search_regex(
+ ('<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
+ '(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title',
+ group='title')
+ description = self._html_search_meta('description', webpage)
+ timestamp = unified_timestamp(self._html_search_regex(
+ r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time',
+ default=None) or self._html_search_meta(
+ 'uploadDate', webpage, 'timestamp', default=None))
+ thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage)
+
+ # TODO 'view_count' requires deobfuscating Javascript
+ info = {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'timestamp': timestamp,
+ 'thumbnail': thumbnail,
+ 'duration': float_or_none(video_info.get('timelength'), scale=1000),
+ }
+
+ uploader_mobj = re.search(
+ r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>(?P<name>[^<]+)',
+ webpage)
+ if uploader_mobj:
+ info.update({
+ 'uploader': uploader_mobj.group('name'),
+ 'uploader_id': uploader_mobj.group('id'),
+ })
+ if not info.get('uploader'):
+ info['uploader'] = self._html_search_meta(
+ 'author', webpage, 'uploader', default=None)
+
+ for entry in entries:
+ entry.update(info)
+
+ if len(entries) == 1:
+ return entries[0]
+ else:
+ for idx, entry in enumerate(entries):
+ entry['id'] = '%s_part%d' % (video_id, (idx + 1))
+
+ return {
+ '_type': 'multi_video',
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'entries': entries,
+ }
+
+
+class BiliBiliBangumiIE(InfoExtractor):
+ _VALID_URL = r'https?://bangumi\.bilibili\.com/anime/(?P<id>\d+)'
+
+ IE_NAME = 'bangumi.bilibili.com'
+ IE_DESC = 'BiliBili番剧'
+
+ _TESTS = [{
+ 'url': 'http://bangumi.bilibili.com/anime/1869',
+ 'info_dict': {
+ 'id': '1869',
+ 'title': '混沌武士',
+ 'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
+ },
+ 'playlist_count': 26,
+ }, {
+ 'url': 'http://bangumi.bilibili.com/anime/1869',
+ 'info_dict': {
+ 'id': '1869',
+ 'title': '混沌武士',
+ 'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
+ },
+ 'playlist': [{
+ 'md5': '91da8621454dd58316851c27c68b0c13',
+ 'info_dict': {
+ 'id': '40062',
+ 'ext': 'mp4',
+ 'title': '混沌武士',
+ 'description': '故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子...',
+ 'timestamp': 1414538739,
+ 'upload_date': '20141028',
+ 'episode': '疾风怒涛 Tempestuous Temperaments',
+ 'episode_number': 1,
+ },
+ }],
+ 'params': {
+ 'playlist_items': '1',
+ },
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if BiliBiliIE.suitable(url) else super(BiliBiliBangumiIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ bangumi_id = self._match_id(url)
+
+ # Sometimes this API returns a JSONP response
+ season_info = self._download_json(
+ 'http://bangumi.bilibili.com/jsonp/seasoninfo/%s.ver' % bangumi_id,
+ bangumi_id, transform_source=strip_jsonp)['result']
+
+ entries = [{
+ '_type': 'url_transparent',
+ 'url': smuggle_url(episode['webplay_url'], {'no_bangumi_tip': 1}),
+ 'ie_key': BiliBiliIE.ie_key(),
+ 'timestamp': parse_iso8601(episode.get('update_time'), delimiter=' '),
+ 'episode': episode.get('index_title'),
+ 'episode_number': int_or_none(episode.get('index')),
+ } for episode in season_info['episodes']]
+
+ entries = sorted(entries, key=lambda entry: entry.get('episode_number'))
+
+ return self.playlist_result(
+ entries, bangumi_id,
+ season_info.get('bangumi_title'), season_info.get('evaluate'))
+
+
+class BilibiliAudioBaseIE(InfoExtractor):
+ def _call_api(self, path, sid, query=None):
+ if not query:
+ query = {'sid': sid}
+ return self._download_json(
+ 'https://www.bilibili.com/audio/music-service-c/web/' + path,
+ sid, query=query)['data']
+
+
+class BilibiliAudioIE(BilibiliAudioBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.bilibili.com/audio/au1003142',
+ 'md5': 'fec4987014ec94ef9e666d4d158ad03b',
+ 'info_dict': {
+ 'id': '1003142',
+ 'ext': 'm4a',
+ 'title': '【tsukimi】YELLOW / 神山羊',
+ 'artist': 'tsukimi',
+ 'comment_count': int,
+ 'description': 'YELLOW的mp3版!',
+ 'duration': 183,
+ 'subtitles': {
+ 'origin': [{
+ 'ext': 'lrc',
+ }],
+ },
+ 'thumbnail': r're:^https?://.+\.jpg',
+ 'timestamp': 1564836614,
+ 'upload_date': '20190803',
+ 'uploader': 'tsukimi-つきみぐー',
+ 'view_count': int,
+ },
+ }
+
+ def _real_extract(self, url):
+ au_id = self._match_id(url)
+
+ play_data = self._call_api('url', au_id)
+ formats = [{
+ 'url': play_data['cdns'][0],
+ 'filesize': int_or_none(play_data.get('size')),
+ }]
+
+ song = self._call_api('song/info', au_id)
+ title = song['title']
+ statistic = song.get('statistic') or {}
+
+ subtitles = None
+ lyric = song.get('lyric')
+ if lyric:
+ subtitles = {
+ 'origin': [{
+ 'url': lyric,
+ }]
+ }
+
+ return {
+ 'id': au_id,
+ 'title': title,
+ 'formats': formats,
+ 'artist': song.get('author'),
+ 'comment_count': int_or_none(statistic.get('comment')),
+ 'description': song.get('intro'),
+ 'duration': int_or_none(song.get('duration')),
+ 'subtitles': subtitles,
+ 'thumbnail': song.get('cover'),
+ 'timestamp': int_or_none(song.get('passtime')),
+ 'uploader': song.get('uname'),
+ 'view_count': int_or_none(statistic.get('play')),
+ }
+
+
+class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.bilibili.com/audio/am10624',
+ 'info_dict': {
+ 'id': '10624',
+ 'title': '每日新曲推荐(每日11:00更新)',
+ 'description': '每天11:00更新,为你推送最新音乐',
+ },
+ 'playlist_count': 19,
+ }
+
+ def _real_extract(self, url):
+ am_id = self._match_id(url)
+
+ songs = self._call_api(
+ 'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
+
+ entries = []
+ for song in songs:
+ sid = str_or_none(song.get('id'))
+ if not sid:
+ continue
+ entries.append(self.url_result(
+ 'https://www.bilibili.com/audio/au' + sid,
+ BilibiliAudioIE.ie_key(), sid))
+
+ if entries:
+ album_data = self._call_api('menu/info', am_id) or {}
+ album_title = album_data.get('title')
+ if album_title:
+ for entry in entries:
+ entry['album'] = album_title
+ return self.playlist_result(
+ entries, am_id, album_title, album_data.get('intro'))
+
+ return self.playlist_result(entries, am_id)
+
+
+class BiliBiliPlayerIE(InfoExtractor):
+ _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
+ 'only_matching': True,
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(
+ 'http://www.bilibili.tv/video/av%s/' % video_id,
+ ie=BiliBiliIE.ie_key(), video_id=video_id)
diff --git a/youtube_dl/extractor/biobiochiletv.py b/youtube_dlc/extractor/biobiochiletv.py
index dc86c57c5..dc86c57c5 100644
--- a/youtube_dl/extractor/biobiochiletv.py
+++ b/youtube_dlc/extractor/biobiochiletv.py
diff --git a/youtube_dlc/extractor/biqle.py b/youtube_dlc/extractor/biqle.py
new file mode 100644
index 000000000..17ebbb257
--- /dev/null
+++ b/youtube_dlc/extractor/biqle.py
@@ -0,0 +1,105 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .vk import VKIE
+from ..compat import (
+ compat_b64decode,
+ compat_urllib_parse_unquote,
+)
+from ..utils import int_or_none
+
+
+class BIQLEIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P<id>-?\d+_\d+)'
+ _TESTS = [{
+ # Youtube embed
+ 'url': 'https://biqle.ru/watch/-115995369_456239081',
+ 'md5': '97af5a06ee4c29bbf9c001bdb1cf5c06',
+ 'info_dict': {
+ 'id': '8v4f-avW-VI',
+ 'ext': 'mp4',
+ 'title': "PASSE-PARTOUT - L'ete c'est fait pour jouer",
+ 'description': 'Passe-Partout',
+ 'uploader_id': 'mrsimpsonstef3',
+ 'uploader': 'Phanolito',
+ 'upload_date': '20120822',
+ },
+ }, {
+ 'url': 'http://biqle.org/watch/-44781847_168547604',
+ 'md5': '7f24e72af1db0edf7c1aaba513174f97',
+ 'info_dict': {
+ 'id': '-44781847_168547604',
+ 'ext': 'mp4',
+ 'title': 'Ребенок в шоке от автоматической мойки',
+ 'timestamp': 1396633454,
+ 'uploader': 'Dmitry Kotov',
+ 'upload_date': '20140404',
+ 'uploader_id': '47850140',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ embed_url = self._proto_relative_url(self._search_regex(
+ r'<iframe.+?src="((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^"]+)".*?></iframe>',
+ webpage, 'embed url'))
+ if VKIE.suitable(embed_url):
+ return self.url_result(embed_url, VKIE.ie_key(), video_id)
+
+ embed_page = self._download_webpage(
+ embed_url, video_id, headers={'Referer': url})
+ video_ext = self._get_cookies(embed_url).get('video_ext')
+ if video_ext:
+ video_ext = compat_urllib_parse_unquote(video_ext.value)
+ if not video_ext:
+ video_ext = compat_b64decode(self._search_regex(
+ r'video_ext\s*:\s*[\'"]([A-Za-z0-9+/=]+)',
+ embed_page, 'video_ext')).decode()
+ video_id, sig, _, access_token = video_ext.split(':')
+ item = self._download_json(
+ 'https://api.vk.com/method/video.get', video_id,
+ headers={'User-Agent': 'okhttp/3.4.1'}, query={
+ 'access_token': access_token,
+ 'sig': sig,
+ 'v': 5.44,
+ 'videos': video_id,
+ })['response']['items'][0]
+ title = item['title']
+
+ formats = []
+ for f_id, f_url in item.get('files', {}).items():
+ if f_id == 'external':
+ return self.url_result(f_url)
+ ext, height = f_id.split('_')
+ formats.append({
+ 'format_id': height + 'p',
+ 'url': f_url,
+ 'height': int_or_none(height),
+ 'ext': ext,
+ })
+ self._sort_formats(formats)
+
+ thumbnails = []
+ for k, v in item.items():
+ if k.startswith('photo_') and v:
+ width = k.replace('photo_', '')
+ thumbnails.append({
+ 'id': width,
+ 'url': v,
+ 'width': int_or_none(width),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'comment_count': int_or_none(item.get('comments')),
+ 'description': item.get('description'),
+ 'duration': int_or_none(item.get('duration')),
+ 'thumbnails': thumbnails,
+ 'timestamp': int_or_none(item.get('date')),
+ 'uploader': item.get('owner_id'),
+ 'view_count': int_or_none(item.get('views')),
+ }
diff --git a/youtube_dlc/extractor/bitchute.py b/youtube_dlc/extractor/bitchute.py
new file mode 100644
index 000000000..0c773e66e
--- /dev/null
+++ b/youtube_dlc/extractor/bitchute.py
@@ -0,0 +1,142 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ orderedSet,
+ unified_strdate,
+ urlencode_postdata,
+)
+
+
+class BitChuteIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?bitchute\.com/(?:video|embed|torrent/[^/]+)/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.bitchute.com/video/szoMrox2JEI/',
+ 'md5': '66c4a70e6bfc40dcb6be3eb1d74939eb',
+ 'info_dict': {
+ 'id': 'szoMrox2JEI',
+ 'ext': 'mp4',
+ 'title': 'Fuck bitches get money',
+ 'description': 'md5:3f21f6fb5b1d17c3dee9cf6b5fe60b3a',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Victoria X Rave',
+ 'upload_date': '20170813',
+ },
+ }, {
+ 'url': 'https://www.bitchute.com/embed/lbb5G1hjPhw/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.bitchute.com/torrent/Zee5BE49045h/szoMrox2JEI.webtorrent',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'https://www.bitchute.com/video/%s' % video_id, video_id, headers={
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.57 Safari/537.36',
+ })
+
+ title = self._html_search_regex(
+ (r'<[^>]+\bid=["\']video-title[^>]+>([^<]+)', r'<title>([^<]+)'),
+ webpage, 'title', default=None) or self._html_search_meta(
+ 'description', webpage, 'title',
+ default=None) or self._og_search_description(webpage)
+
+ format_urls = []
+ for mobj in re.finditer(
+ r'addWebSeed\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage):
+ format_urls.append(mobj.group('url'))
+ format_urls.extend(re.findall(r'as=(https?://[^&"\']+)', webpage))
+
+ formats = [
+ {'url': format_url}
+ for format_url in orderedSet(format_urls)]
+
+ if not formats:
+ formats = self._parse_html5_media_entries(
+ url, webpage, video_id)[0]['formats']
+
+ self._check_formats(formats, video_id)
+ self._sort_formats(formats)
+
+ description = self._html_search_regex(
+ r'(?s)<div\b[^>]+\bclass=["\']full hidden[^>]+>(.+?)</div>',
+ webpage, 'description', fatal=False)
+ thumbnail = self._og_search_thumbnail(
+ webpage, default=None) or self._html_search_meta(
+ 'twitter:image:src', webpage, 'thumbnail')
+ uploader = self._html_search_regex(
+ (r'(?s)<div class=["\']channel-banner.*?<p\b[^>]+\bclass=["\']name[^>]+>(.+?)</p>',
+ r'(?s)<p\b[^>]+\bclass=["\']video-author[^>]+>(.+?)</p>'),
+ webpage, 'uploader', fatal=False)
+
+ upload_date = unified_strdate(self._search_regex(
+ r'class=["\']video-publish-date[^>]+>[^<]+ at \d+:\d+ UTC on (.+?)\.',
+ webpage, 'upload date', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'upload_date': upload_date,
+ 'formats': formats,
+ }
+
+
+class BitChuteChannelIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?bitchute\.com/channel/(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://www.bitchute.com/channel/victoriaxrave/',
+ 'playlist_mincount': 185,
+ 'info_dict': {
+ 'id': 'victoriaxrave',
+ },
+ }
+
+ _TOKEN = 'zyG6tQcGPE5swyAEFLqKUwMuMMuF6IO2DZ6ZDQjGfsL0e4dcTLwqkTTul05Jdve7'
+
+ def _entries(self, channel_id):
+ channel_url = 'https://www.bitchute.com/channel/%s/' % channel_id
+ offset = 0
+ for page_num in itertools.count(1):
+ data = self._download_json(
+ '%sextend/' % channel_url, channel_id,
+ 'Downloading channel page %d' % page_num,
+ data=urlencode_postdata({
+ 'csrfmiddlewaretoken': self._TOKEN,
+ 'name': '',
+ 'offset': offset,
+ }), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+ 'Referer': channel_url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'Cookie': 'csrftoken=%s' % self._TOKEN,
+ })
+ if data.get('success') is False:
+ break
+ html = data.get('html')
+ if not html:
+ break
+ video_ids = re.findall(
+ r'class=["\']channel-videos-image-container[^>]+>\s*<a\b[^>]+\bhref=["\']/video/([^"\'/]+)',
+ html)
+ if not video_ids:
+ break
+ offset += len(video_ids)
+ for video_id in video_ids:
+ yield self.url_result(
+ 'https://www.bitchute.com/video/%s' % video_id,
+ ie=BitChuteIE.ie_key(), video_id=video_id)
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+ return self.playlist_result(
+ self._entries(channel_id), playlist_id=channel_id)
diff --git a/youtube_dl/extractor/bleacherreport.py b/youtube_dlc/extractor/bleacherreport.py
index dc60224d0..dc60224d0 100644
--- a/youtube_dl/extractor/bleacherreport.py
+++ b/youtube_dlc/extractor/bleacherreport.py
diff --git a/youtube_dl/extractor/blinkx.py b/youtube_dlc/extractor/blinkx.py
index db5e12b21..db5e12b21 100644
--- a/youtube_dl/extractor/blinkx.py
+++ b/youtube_dlc/extractor/blinkx.py
diff --git a/youtube_dl/extractor/bloomberg.py b/youtube_dlc/extractor/bloomberg.py
index 2fbfad1ba..2fbfad1ba 100644
--- a/youtube_dl/extractor/bloomberg.py
+++ b/youtube_dlc/extractor/bloomberg.py
diff --git a/youtube_dl/extractor/bokecc.py b/youtube_dlc/extractor/bokecc.py
index 6017e8344..6017e8344 100644
--- a/youtube_dl/extractor/bokecc.py
+++ b/youtube_dlc/extractor/bokecc.py
diff --git a/youtube_dl/extractor/bostonglobe.py b/youtube_dlc/extractor/bostonglobe.py
index 57882fbee..57882fbee 100644
--- a/youtube_dl/extractor/bostonglobe.py
+++ b/youtube_dlc/extractor/bostonglobe.py
diff --git a/youtube_dl/extractor/bpb.py b/youtube_dlc/extractor/bpb.py
index 07833532e..07833532e 100644
--- a/youtube_dl/extractor/bpb.py
+++ b/youtube_dlc/extractor/bpb.py
diff --git a/youtube_dl/extractor/br.py b/youtube_dlc/extractor/br.py
index 9bde7f2d8..9bde7f2d8 100644
--- a/youtube_dl/extractor/br.py
+++ b/youtube_dlc/extractor/br.py
diff --git a/youtube_dl/extractor/bravotv.py b/youtube_dlc/extractor/bravotv.py
index b9715df00..b9715df00 100644
--- a/youtube_dl/extractor/bravotv.py
+++ b/youtube_dlc/extractor/bravotv.py
diff --git a/youtube_dl/extractor/breakcom.py b/youtube_dlc/extractor/breakcom.py
index 68c7cf2bb..68c7cf2bb 100644
--- a/youtube_dl/extractor/breakcom.py
+++ b/youtube_dlc/extractor/breakcom.py
diff --git a/youtube_dlc/extractor/brightcove.py b/youtube_dlc/extractor/brightcove.py
new file mode 100644
index 000000000..2aa9f4782
--- /dev/null
+++ b/youtube_dlc/extractor/brightcove.py
@@ -0,0 +1,677 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+import re
+import struct
+
+from .adobepass import AdobePassIE
+from .common import InfoExtractor
+from ..compat import (
+ compat_etree_fromstring,
+ compat_HTTPError,
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
+ compat_urlparse,
+ compat_xml_parse_error,
+)
+from ..utils import (
+ clean_html,
+ extract_attributes,
+ ExtractorError,
+ find_xpath_attr,
+ fix_xml_ampersands,
+ float_or_none,
+ int_or_none,
+ js_to_json,
+ mimetype2ext,
+ parse_iso8601,
+ smuggle_url,
+ str_or_none,
+ unescapeHTML,
+ unsmuggle_url,
+ UnsupportedError,
+ update_url_query,
+ url_or_none,
+)
+
+
+class BrightcoveLegacyIE(InfoExtractor):
+ IE_NAME = 'brightcove:legacy'
+ _VALID_URL = r'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)'
+
+ _TESTS = [
+ {
+ # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/
+ 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',
+ 'md5': '5423e113865d26e40624dce2e4b45d95',
+ 'note': 'Test Brightcove downloads and detection in GenericIE',
+ 'info_dict': {
+ 'id': '2371591881001',
+ 'ext': 'mp4',
+ 'title': 'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
+ 'uploader': '8TV',
+ 'description': 'md5:a950cc4285c43e44d763d036710cd9cd',
+ 'timestamp': 1368213670,
+ 'upload_date': '20130510',
+ 'uploader_id': '1589608506001',
+ },
+ 'skip': 'The player has been deactivated by the content owner',
+ },
+ {
+ # From http://medianetwork.oracle.com/video/player/1785452137001
+ 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1217746023001&flashID=myPlayer&%40videoPlayer=1785452137001',
+ 'info_dict': {
+ 'id': '1785452137001',
+ 'ext': 'flv',
+ 'title': 'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges',
+ 'description': 'John Rose speaks at the JVM Language Summit, August 1, 2012.',
+ 'uploader': 'Oracle',
+ 'timestamp': 1344975024,
+ 'upload_date': '20120814',
+ 'uploader_id': '1460825906',
+ },
+ 'skip': 'video not playable',
+ },
+ {
+ # From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/
+ 'url': 'http://c.brightcove.com/services/viewer/federated_f9?&playerID=1265504713001&publisherID=AQ%7E%7E%2CAAABBzUwv1E%7E%2CxP-xFHVUstiMFlNYfvF4G9yFnNaqCw_9&videoID=2750934548001',
+ 'info_dict': {
+ 'id': '2750934548001',
+ 'ext': 'mp4',
+ 'title': 'This Bracelet Acts as a Personal Thermostat',
+ 'description': 'md5:547b78c64f4112766ccf4e151c20b6a0',
+ # 'uploader': 'Mashable',
+ 'timestamp': 1382041798,
+ 'upload_date': '20131017',
+ 'uploader_id': '1130468786001',
+ },
+ },
+ {
+ # test that the default referer works
+ # from http://national.ballet.ca/interact/video/Lost_in_Motion_II/
+ 'url': 'http://link.brightcove.com/services/player/bcpid756015033001?bckey=AQ~~,AAAApYJi_Ck~,GxhXCegT1Dp39ilhXuxMJxasUhVNZiil&bctid=2878862109001',
+ 'info_dict': {
+ 'id': '2878862109001',
+ 'ext': 'mp4',
+ 'title': 'Lost in Motion II',
+ 'description': 'md5:363109c02998fee92ec02211bd8000df',
+ 'uploader': 'National Ballet of Canada',
+ },
+ 'skip': 'Video gone',
+ },
+ {
+ # test flv videos served by akamaihd.net
+ # From http://www.redbull.com/en/bike/stories/1331655643987/replay-uci-dh-world-cup-2014-from-fort-william
+ 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?%40videoPlayer=ref%3Aevent-stream-356&linkBaseURL=http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fvideos%2F1331655630249%2Freplay-uci-fort-william-2014-dh&playerKey=AQ%7E%7E%2CAAAApYJ7UqE%7E%2Cxqr_zXk0I-zzNndy8NlHogrCb5QdyZRf&playerID=1398061561001#__youtubedl_smuggle=%7B%22Referer%22%3A+%22http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fstories%2F1331655643987%2Freplay-uci-dh-world-cup-2014-from-fort-william%22%7D',
+ # The md5 checksum changes on each download
+ 'info_dict': {
+ 'id': '3750436379001',
+ 'ext': 'flv',
+ 'title': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals',
+ 'uploader': 'RBTV Old (do not use)',
+ 'description': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals',
+ 'timestamp': 1409122195,
+ 'upload_date': '20140827',
+ 'uploader_id': '710858724001',
+ },
+ 'skip': 'Video gone',
+ },
+ {
+ # playlist with 'videoList'
+ # from http://support.brightcove.com/en/video-cloud/docs/playlist-support-single-video-players
+ 'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL',
+ 'info_dict': {
+ 'title': 'Sealife',
+ 'id': '3550319591001',
+ },
+ 'playlist_mincount': 7,
+ 'skip': 'Unsupported URL',
+ },
+ {
+ # playlist with 'playlistTab' (https://github.com/ytdl-org/youtube-dl/issues/9965)
+ 'url': 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=AQ%7E%7E,AAABXlLMdok%7E,NJ4EoMlZ4rZdx9eU1rkMVd8EaYPBBUlg',
+ 'info_dict': {
+ 'id': '1522758701001',
+ 'title': 'Lesson 08',
+ },
+ 'playlist_mincount': 10,
+ 'skip': 'Unsupported URL',
+ },
+ {
+ # playerID inferred from bcpid
+ # from http://www.un.org/chinese/News/story.asp?NewsID=27724
+ 'url': 'https://link.brightcove.com/services/player/bcpid1722935254001/?bctid=5360463607001&autoStart=false&secureConnections=true&width=650&height=350',
+ 'only_matching': True, # Tested in GenericIE
+ }
+ ]
+
+ @classmethod
+ def _build_brighcove_url(cls, object_str):
+ """
+ Build a Brightcove url from a xml string containing
+ <object class="BrightcoveExperience">{params}</object>
+ """
+
+ # Fix up some stupid HTML, see https://github.com/ytdl-org/youtube-dl/issues/1553
+ object_str = re.sub(r'(<param(?:\s+[a-zA-Z0-9_]+="[^"]*")*)>',
+ lambda m: m.group(1) + '/>', object_str)
+ # Fix up some stupid XML, see https://github.com/ytdl-org/youtube-dl/issues/1608
+ object_str = object_str.replace('<--', '<!--')
+ # remove namespace to simplify extraction
+ object_str = re.sub(r'(<object[^>]*)(xmlns=".*?")', r'\1', object_str)
+ object_str = fix_xml_ampersands(object_str)
+
+ try:
+ object_doc = compat_etree_fromstring(object_str.encode('utf-8'))
+ except compat_xml_parse_error:
+ return
+
+ fv_el = find_xpath_attr(object_doc, './param', 'name', 'flashVars')
+ if fv_el is not None:
+ flashvars = dict(
+ (k, v[0])
+ for k, v in compat_parse_qs(fv_el.attrib['value']).items())
+ else:
+ flashvars = {}
+
+ data_url = object_doc.attrib.get('data', '')
+ data_url_params = compat_parse_qs(compat_urllib_parse_urlparse(data_url).query)
+
+ def find_param(name):
+ if name in flashvars:
+ return flashvars[name]
+ node = find_xpath_attr(object_doc, './param', 'name', name)
+ if node is not None:
+ return node.attrib['value']
+ return data_url_params.get(name)
+
+ params = {}
+
+ playerID = find_param('playerID') or find_param('playerId')
+ if playerID is None:
+ raise ExtractorError('Cannot find player ID')
+ params['playerID'] = playerID
+
+ playerKey = find_param('playerKey')
+ # Not all pages define this value
+ if playerKey is not None:
+ params['playerKey'] = playerKey
+ # These fields hold the id of the video
+ videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList')
+ if videoPlayer is not None:
+ if isinstance(videoPlayer, list):
+ videoPlayer = videoPlayer[0]
+ videoPlayer = videoPlayer.strip()
+ # UUID is also possible for videoPlayer (e.g.
+ # http://www.popcornflix.com/hoodies-vs-hooligans/7f2d2b87-bbf2-4623-acfb-ea942b4f01dd
+ # or http://www8.hp.com/cn/zh/home.html)
+ if not (re.match(
+ r'^(?:\d+|[\da-fA-F]{8}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{12})$',
+ videoPlayer) or videoPlayer.startswith('ref:')):
+ return None
+ params['@videoPlayer'] = videoPlayer
+ linkBase = find_param('linkBaseURL')
+ if linkBase is not None:
+ params['linkBaseURL'] = linkBase
+ return cls._make_brightcove_url(params)
+
+ @classmethod
+ def _build_brighcove_url_from_js(cls, object_js):
+ # The layout of JS is as follows:
+ # customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) {
+ # // build Brightcove <object /> XML
+ # }
+ m = re.search(
+ r'''(?x)customBC\.createVideo\(
+ .*? # skipping width and height
+ ["\'](?P<playerID>\d+)["\']\s*,\s* # playerID
+ ["\'](?P<playerKey>AQ[^"\']{48})[^"\']*["\']\s*,\s* # playerKey begins with AQ and is 50 characters
+ # in length, however it's appended to itself
+ # in places, so truncate
+ ["\'](?P<videoID>\d+)["\'] # @videoPlayer
+ ''', object_js)
+ if m:
+ return cls._make_brightcove_url(m.groupdict())
+
+ @classmethod
+ def _make_brightcove_url(cls, params):
+ return update_url_query(
+ 'http://c.brightcove.com/services/viewer/htmlFederated', params)
+
+ @classmethod
+ def _extract_brightcove_url(cls, webpage):
+ """Try to extract the brightcove url from the webpage, returns None
+ if it can't be found
+ """
+ urls = cls._extract_brightcove_urls(webpage)
+ return urls[0] if urls else None
+
+ @classmethod
+ def _extract_brightcove_urls(cls, webpage):
+ """Return a list of all Brightcove URLs from the webpage """
+
+ url_m = re.search(
+ r'''(?x)
+ <meta\s+
+ (?:property|itemprop)=([\'"])(?:og:video|embedURL)\1[^>]+
+ content=([\'"])(?P<url>https?://(?:secure|c)\.brightcove.com/(?:(?!\2).)+)\2
+ ''', webpage)
+ if url_m:
+ url = unescapeHTML(url_m.group('url'))
+ # Some sites don't add it, we can't download with this url, for example:
+ # http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/
+ if 'playerKey' in url or 'videoId' in url or 'idVideo' in url:
+ return [url]
+
+ matches = re.findall(
+ r'''(?sx)<object
+ (?:
+ [^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |
+ [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
+ ).+?>\s*</object>''',
+ webpage)
+ if matches:
+ return list(filter(None, [cls._build_brighcove_url(m) for m in matches]))
+
+ matches = re.findall(r'(customBC\.createVideo\(.+?\);)', webpage)
+ if matches:
+ return list(filter(None, [
+ cls._build_brighcove_url_from_js(custom_bc)
+ for custom_bc in matches]))
+ return [src for _, src in re.findall(
+ r'<iframe[^>]+src=([\'"])((?:https?:)?//link\.brightcove\.com/services/player/(?!\1).+)\1', webpage)]
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+
+ # Change the 'videoId' and others field to '@videoPlayer'
+ url = re.sub(r'(?<=[?&])(videoI(d|D)|idVideo|bctid)', '%40videoPlayer', url)
+ # Change bckey (used by bcove.me urls) to playerKey
+ url = re.sub(r'(?<=[?&])bckey', 'playerKey', url)
+ mobj = re.match(self._VALID_URL, url)
+ query_str = mobj.group('query')
+ query = compat_urlparse.parse_qs(query_str)
+
+ videoPlayer = query.get('@videoPlayer')
+ if videoPlayer:
+ # We set the original url as the default 'Referer' header
+ referer = query.get('linkBaseURL', [None])[0] or smuggled_data.get('Referer', url)
+ video_id = videoPlayer[0]
+ if 'playerID' not in query:
+ mobj = re.search(r'/bcpid(\d+)', url)
+ if mobj is not None:
+ query['playerID'] = [mobj.group(1)]
+ publisher_id = query.get('publisherId')
+ if publisher_id and publisher_id[0].isdigit():
+ publisher_id = publisher_id[0]
+ if not publisher_id:
+ player_key = query.get('playerKey')
+ if player_key and ',' in player_key[0]:
+ player_key = player_key[0]
+ else:
+ player_id = query.get('playerID')
+ if player_id and player_id[0].isdigit():
+ headers = {}
+ if referer:
+ headers['Referer'] = referer
+ player_page = self._download_webpage(
+ 'http://link.brightcove.com/services/player/bcpid' + player_id[0],
+ video_id, headers=headers, fatal=False)
+ if player_page:
+ player_key = self._search_regex(
+ r'<param\s+name="playerKey"\s+value="([\w~,-]+)"',
+ player_page, 'player key', fatal=False)
+ if player_key:
+ enc_pub_id = player_key.split(',')[1].replace('~', '=')
+ publisher_id = struct.unpack('>Q', base64.urlsafe_b64decode(enc_pub_id))[0]
+ if publisher_id:
+ brightcove_new_url = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' % (publisher_id, video_id)
+ if referer:
+ brightcove_new_url = smuggle_url(brightcove_new_url, {'referrer': referer})
+ return self.url_result(brightcove_new_url, BrightcoveNewIE.ie_key(), video_id)
+ # TODO: figure out if it's possible to extract playlistId from playerKey
+ # elif 'playerKey' in query:
+ # player_key = query['playerKey']
+ # return self._get_playlist_info(player_key[0])
+ raise UnsupportedError(url)
+
+
+class BrightcoveNewIE(AdobePassIE):
+ IE_NAME = 'brightcove:new'
+ _VALID_URL = r'https?://players\.brightcove\.net/(?P<account_id>\d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index\.html\?.*(?P<content_type>video|playlist)Id=(?P<video_id>\d+|ref:[^&]+)'
+ _TESTS = [{
+ 'url': 'http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001',
+ 'md5': 'c8100925723840d4b0d243f7025703be',
+ 'info_dict': {
+ 'id': '4463358922001',
+ 'ext': 'mp4',
+ 'title': 'Meet the man behind Popcorn Time',
+ 'description': 'md5:eac376a4fe366edc70279bfb681aea16',
+ 'duration': 165.768,
+ 'timestamp': 1441391203,
+ 'upload_date': '20150904',
+ 'uploader_id': '929656772001',
+ 'formats': 'mincount:20',
+ },
+ }, {
+ # with rtmp streams
+ 'url': 'http://players.brightcove.net/4036320279001/5d112ed9-283f-485f-a7f9-33f42e8bc042_default/index.html?videoId=4279049078001',
+ 'info_dict': {
+ 'id': '4279049078001',
+ 'ext': 'mp4',
+ 'title': 'Titansgrave: Chapter 0',
+ 'description': 'Titansgrave: Chapter 0',
+ 'duration': 1242.058,
+ 'timestamp': 1433556729,
+ 'upload_date': '20150606',
+ 'uploader_id': '4036320279001',
+ 'formats': 'mincount:39',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ # playlist stream
+ 'url': 'https://players.brightcove.net/1752604059001/S13cJdUBz_default/index.html?playlistId=5718313430001',
+ 'info_dict': {
+ 'id': '5718313430001',
+ 'title': 'No Audio Playlist',
+ },
+ 'playlist_count': 7,
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=5743160747001',
+ 'only_matching': True,
+ }, {
+ # ref: prefixed video id
+ 'url': 'http://players.brightcove.net/3910869709001/21519b5c-4b3b-4363-accb-bdc8f358f823_default/index.html?videoId=ref:7069442',
+ 'only_matching': True,
+ }, {
+ # non numeric ref: prefixed video id
+ 'url': 'http://players.brightcove.net/710858724001/default_default/index.html?videoId=ref:event-stream-356',
+ 'only_matching': True,
+ }, {
+ # unavailable video without message but with error_code
+ 'url': 'http://players.brightcove.net/1305187701/c832abfb-641b-44eb-9da0-2fe76786505f_default/index.html?videoId=4377407326001',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_url(ie, webpage):
+ urls = BrightcoveNewIE._extract_urls(ie, webpage)
+ return urls[0] if urls else None
+
+ @staticmethod
+ def _extract_urls(ie, webpage):
+ # Reference:
+ # 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe
+ # 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#tag
+ # 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript
+ # 4. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/in-page-embed-player-implementation.html
+ # 5. https://support.brightcove.com/en/video-cloud/docs/dynamically-assigning-videos-player
+
+ entries = []
+
+ # Look for iframe embeds [1]
+ for _, url in re.findall(
+ r'<iframe[^>]+src=(["\'])((?:https?:)?//players\.brightcove\.net/\d+/[^/]+/index\.html.+?)\1', webpage):
+ entries.append(url if url.startswith('http') else 'http:' + url)
+
+ # Look for <video> tags [2] and embed_in_page embeds [3]
+ # [2] looks like:
+ for video, script_tag, account_id, player_id, embed in re.findall(
+ r'''(?isx)
+ (<video(?:-js)?\s+[^>]*\bdata-video-id\s*=\s*['"]?[^>]+>)
+ (?:.*?
+ (<script[^>]+
+ src=["\'](?:https?:)?//players\.brightcove\.net/
+ (\d+)/([^/]+)_([^/]+)/index(?:\.min)?\.js
+ )
+ )?
+ ''', webpage):
+ attrs = extract_attributes(video)
+
+ # According to examples from [4] it's unclear whether video id
+ # may be optional and what to do when it is
+ video_id = attrs.get('data-video-id')
+ if not video_id:
+ continue
+
+ account_id = account_id or attrs.get('data-account')
+ if not account_id:
+ continue
+
+ player_id = player_id or attrs.get('data-player') or 'default'
+ embed = embed or attrs.get('data-embed') or 'default'
+
+ bc_url = 'http://players.brightcove.net/%s/%s_%s/index.html?videoId=%s' % (
+ account_id, player_id, embed, video_id)
+
+ # Some brightcove videos may be embedded with video tag only and
+ # without script tag or any mentioning of brightcove at all. Such
+ # embeds are considered ambiguous since they are matched based only
+ # on data-video-id and data-account attributes and in the wild may
+ # not be brightcove embeds at all. Let's check reconstructed
+ # brightcove URLs in case of such embeds and only process valid
+ # ones. By this we ensure there is indeed a brightcove embed.
+ if not script_tag and not ie._is_valid_url(
+ bc_url, video_id, 'possible brightcove video'):
+ continue
+
+ entries.append(bc_url)
+
+ return entries
+
+ def _parse_brightcove_metadata(self, json_data, video_id, headers={}):
+ title = json_data['name'].strip()
+
+ formats = []
+ for source in json_data.get('sources', []):
+ container = source.get('container')
+ ext = mimetype2ext(source.get('type'))
+ src = source.get('src')
+ # https://support.brightcove.com/playback-api-video-fields-reference#key_systems_object
+ if ext == 'ism' or container == 'WVM' or source.get('key_systems'):
+ continue
+ elif ext == 'm3u8' or container == 'M2TS':
+ if not src:
+ continue
+ formats.extend(self._extract_m3u8_formats(
+ src, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+ elif ext == 'mpd':
+ if not src:
+ continue
+ formats.extend(self._extract_mpd_formats(src, video_id, 'dash', fatal=False))
+ else:
+ streaming_src = source.get('streaming_src')
+ stream_name, app_name = source.get('stream_name'), source.get('app_name')
+ if not src and not streaming_src and (not stream_name or not app_name):
+ continue
+ tbr = float_or_none(source.get('avg_bitrate'), 1000)
+ height = int_or_none(source.get('height'))
+ width = int_or_none(source.get('width'))
+ f = {
+ 'tbr': tbr,
+ 'filesize': int_or_none(source.get('size')),
+ 'container': container,
+ 'ext': ext or container.lower(),
+ }
+ if width == 0 and height == 0:
+ f.update({
+ 'vcodec': 'none',
+ })
+ else:
+ f.update({
+ 'width': width,
+ 'height': height,
+ 'vcodec': source.get('codec'),
+ })
+
+ def build_format_id(kind):
+ format_id = kind
+ if tbr:
+ format_id += '-%dk' % int(tbr)
+ if height:
+ format_id += '-%dp' % height
+ return format_id
+
+ if src or streaming_src:
+ f.update({
+ 'url': src or streaming_src,
+ 'format_id': build_format_id('http' if src else 'http-streaming'),
+ 'source_preference': 0 if src else -1,
+ })
+ else:
+ f.update({
+ 'url': app_name,
+ 'play_path': stream_name,
+ 'format_id': build_format_id('rtmp'),
+ })
+ formats.append(f)
+ if not formats:
+ # for sonyliv.com DRM protected videos
+ s3_source_url = json_data.get('custom_fields', {}).get('s3sourceurl')
+ if s3_source_url:
+ formats.append({
+ 'url': s3_source_url,
+ 'format_id': 'source',
+ })
+
+ errors = json_data.get('errors')
+ if not formats and errors:
+ error = errors[0]
+ raise ExtractorError(
+ error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
+
+ self._sort_formats(formats)
+
+ for f in formats:
+ f.setdefault('http_headers', {}).update(headers)
+
+ subtitles = {}
+ for text_track in json_data.get('text_tracks', []):
+ if text_track.get('kind') != 'captions':
+ continue
+ text_track_url = url_or_none(text_track.get('src'))
+ if not text_track_url:
+ continue
+ lang = (str_or_none(text_track.get('srclang'))
+ or str_or_none(text_track.get('label')) or 'en').lower()
+ subtitles.setdefault(lang, []).append({
+ 'url': text_track_url,
+ })
+
+ is_live = False
+ duration = float_or_none(json_data.get('duration'), 1000)
+ if duration is not None and duration <= 0:
+ is_live = True
+
+ return {
+ 'id': video_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'description': clean_html(json_data.get('description')),
+ 'thumbnail': json_data.get('thumbnail') or json_data.get('poster'),
+ 'duration': duration,
+ 'timestamp': parse_iso8601(json_data.get('published_at')),
+ 'uploader_id': json_data.get('account_id'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'tags': json_data.get('tags', []),
+ 'is_live': is_live,
+ }
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ self._initialize_geo_bypass({
+ 'countries': smuggled_data.get('geo_countries'),
+ 'ip_blocks': smuggled_data.get('geo_ip_blocks'),
+ })
+
+ account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups()
+
+ policy_key_id = '%s_%s' % (account_id, player_id)
+ policy_key = self._downloader.cache.load('brightcove', policy_key_id)
+ policy_key_extracted = False
+ store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x)
+
+ def extract_policy_key():
+ webpage = self._download_webpage(
+ 'http://players.brightcove.net/%s/%s_%s/index.min.js'
+ % (account_id, player_id, embed), video_id)
+
+ policy_key = None
+
+ catalog = self._search_regex(
+ r'catalog\(({.+?})\);', webpage, 'catalog', default=None)
+ if catalog:
+ catalog = self._parse_json(
+ js_to_json(catalog), video_id, fatal=False)
+ if catalog:
+ policy_key = catalog.get('policyKey')
+
+ if not policy_key:
+ policy_key = self._search_regex(
+ r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1',
+ webpage, 'policy key', group='pk')
+
+ store_pk(policy_key)
+ return policy_key
+
+ api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id)
+ headers = {}
+ referrer = smuggled_data.get('referrer')
+ if referrer:
+ headers.update({
+ 'Referer': referrer,
+ 'Origin': re.search(r'https?://[^/]+', referrer).group(0),
+ })
+
+ for _ in range(2):
+ if not policy_key:
+ policy_key = extract_policy_key()
+ policy_key_extracted = True
+ headers['Accept'] = 'application/json;pk=%s' % policy_key
+ try:
+ json_data = self._download_json(api_url, video_id, headers=headers)
+ break
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
+ json_data = self._parse_json(e.cause.read().decode(), video_id)[0]
+ message = json_data.get('message') or json_data['error_code']
+ if json_data.get('error_subcode') == 'CLIENT_GEO':
+ self.raise_geo_restricted(msg=message)
+ elif json_data.get('error_code') == 'INVALID_POLICY_KEY' and not policy_key_extracted:
+ policy_key = None
+ store_pk(None)
+ continue
+ raise ExtractorError(message, expected=True)
+ raise
+
+ errors = json_data.get('errors')
+ if errors and errors[0].get('error_subcode') == 'TVE_AUTH':
+ custom_fields = json_data['custom_fields']
+ tve_token = self._extract_mvpd_auth(
+ smuggled_data['source_url'], video_id,
+ custom_fields['bcadobepassrequestorid'],
+ custom_fields['bcadobepassresourceid'])
+ json_data = self._download_json(
+ api_url, video_id, headers={
+ 'Accept': 'application/json;pk=%s' % policy_key
+ }, query={
+ 'tveToken': tve_token,
+ })
+
+ if content_type == 'playlist':
+ return self.playlist_result(
+ [self._parse_brightcove_metadata(vid, vid.get('id'), headers)
+ for vid in json_data.get('videos', []) if vid.get('id')],
+ json_data.get('id'), json_data.get('name'),
+ json_data.get('description'))
+
+ return self._parse_brightcove_metadata(
+ json_data, video_id, headers=headers)
diff --git a/youtube_dlc/extractor/businessinsider.py b/youtube_dlc/extractor/businessinsider.py
new file mode 100644
index 000000000..73a57b1e4
--- /dev/null
+++ b/youtube_dlc/extractor/businessinsider.py
@@ -0,0 +1,48 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .jwplatform import JWPlatformIE
+
+
+class BusinessInsiderIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+\.)?businessinsider\.(?:com|nl)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://uk.businessinsider.com/how-much-radiation-youre-exposed-to-in-everyday-life-2016-6',
+ 'md5': 'ffed3e1e12a6f950aa2f7d83851b497a',
+ 'info_dict': {
+ 'id': 'cjGDb0X9',
+ 'ext': 'mp4',
+ 'title': "Bananas give you more radiation exposure than living next to a nuclear power plant",
+ 'description': 'md5:0175a3baf200dd8fa658f94cade841b3',
+ 'upload_date': '20160611',
+ 'timestamp': 1465675620,
+ },
+ }, {
+ 'url': 'https://www.businessinsider.nl/5-scientifically-proven-things-make-you-less-attractive-2017-7/',
+ 'md5': '43f438dbc6da0b89f5ac42f68529d84a',
+ 'info_dict': {
+ 'id': '5zJwd4FK',
+ 'ext': 'mp4',
+ 'title': 'Deze dingen zorgen ervoor dat je minder snel een date scoort',
+ 'description': 'md5:2af8975825d38a4fed24717bbe51db49',
+ 'upload_date': '20170705',
+ 'timestamp': 1499270528,
+ },
+ }, {
+ 'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ jwplatform_id = self._search_regex(
+ (r'data-media-id=["\']([a-zA-Z0-9]{8})',
+ r'id=["\']jwplayer_([a-zA-Z0-9]{8})',
+ r'id["\']?\s*:\s*["\']?([a-zA-Z0-9]{8})',
+ r'(?:jwplatform\.com/players/|jwplayer_)([a-zA-Z0-9]{8})'),
+ webpage, 'jwplatform id')
+ return self.url_result(
+ 'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(),
+ video_id=video_id)
diff --git a/youtube_dl/extractor/buzzfeed.py b/youtube_dlc/extractor/buzzfeed.py
index ec411091e..ec411091e 100644
--- a/youtube_dl/extractor/buzzfeed.py
+++ b/youtube_dlc/extractor/buzzfeed.py
diff --git a/youtube_dl/extractor/byutv.py b/youtube_dlc/extractor/byutv.py
index 0b11bf11f..0b11bf11f 100644
--- a/youtube_dl/extractor/byutv.py
+++ b/youtube_dlc/extractor/byutv.py
diff --git a/youtube_dl/extractor/c56.py b/youtube_dlc/extractor/c56.py
index cac8fdcba..cac8fdcba 100644
--- a/youtube_dl/extractor/c56.py
+++ b/youtube_dlc/extractor/c56.py
diff --git a/youtube_dl/extractor/camdemy.py b/youtube_dlc/extractor/camdemy.py
index 8f0c6c545..8f0c6c545 100644
--- a/youtube_dl/extractor/camdemy.py
+++ b/youtube_dlc/extractor/camdemy.py
diff --git a/youtube_dl/extractor/cammodels.py b/youtube_dlc/extractor/cammodels.py
index 1eb81b75e..1eb81b75e 100644
--- a/youtube_dl/extractor/cammodels.py
+++ b/youtube_dlc/extractor/cammodels.py
diff --git a/youtube_dl/extractor/camtube.py b/youtube_dlc/extractor/camtube.py
index b3be3bdcf..b3be3bdcf 100644
--- a/youtube_dl/extractor/camtube.py
+++ b/youtube_dlc/extractor/camtube.py
diff --git a/youtube_dl/extractor/camwithher.py b/youtube_dlc/extractor/camwithher.py
index bbc5205fd..bbc5205fd 100644
--- a/youtube_dl/extractor/camwithher.py
+++ b/youtube_dlc/extractor/camwithher.py
diff --git a/youtube_dl/extractor/canalc2.py b/youtube_dlc/extractor/canalc2.py
index 407cc8084..407cc8084 100644
--- a/youtube_dl/extractor/canalc2.py
+++ b/youtube_dlc/extractor/canalc2.py
diff --git a/youtube_dl/extractor/canalplus.py b/youtube_dlc/extractor/canalplus.py
index 51c11cb7e..51c11cb7e 100644
--- a/youtube_dl/extractor/canalplus.py
+++ b/youtube_dlc/extractor/canalplus.py
diff --git a/youtube_dlc/extractor/canvas.py b/youtube_dlc/extractor/canvas.py
new file mode 100644
index 000000000..8667a0d04
--- /dev/null
+++ b/youtube_dlc/extractor/canvas.py
@@ -0,0 +1,368 @@
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+from .gigya import GigyaBaseIE
+from ..compat import compat_HTTPError
+from ..utils import (
+ ExtractorError,
+ strip_or_none,
+ float_or_none,
+ int_or_none,
+ merge_dicts,
+ parse_iso8601,
+ str_or_none,
+ url_or_none,
+)
+
+
+class CanvasIE(InfoExtractor):
+ _VALID_URL = r'https?://mediazone\.vrt\.be/api/v1/(?P<site_id>canvas|een|ketnet|vrt(?:video|nieuws)|sporza)/assets/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
+ 'md5': '68993eda72ef62386a15ea2cf3c93107',
+ 'info_dict': {
+ 'id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
+ 'display_id': 'md-ast-4ac54990-ce66-4d00-a8ca-9eac86f4c475',
+ 'ext': 'mp4',
+ 'title': 'Nachtwacht: De Greystook',
+ 'description': 'Nachtwacht: De Greystook',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 1468.04,
+ },
+ 'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
+ }, {
+ 'url': 'https://mediazone.vrt.be/api/v1/canvas/assets/mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
+ 'only_matching': True,
+ }]
+ _HLS_ENTRY_PROTOCOLS_MAP = {
+ 'HLS': 'm3u8_native',
+ 'HLS_AES': 'm3u8',
+ }
+ _REST_API_BASE = 'https://media-services-public.vrt.be/vualto-video-aggregator-web/rest/external/v1'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ site_id, video_id = mobj.group('site_id'), mobj.group('id')
+
+ # Old API endpoint, serves more formats but may fail for some videos
+ data = self._download_json(
+ 'https://mediazone.vrt.be/api/v1/%s/assets/%s'
+ % (site_id, video_id), video_id, 'Downloading asset JSON',
+ 'Unable to download asset JSON', fatal=False)
+
+ # New API endpoint
+ if not data:
+ token = self._download_json(
+ '%s/tokens' % self._REST_API_BASE, video_id,
+ 'Downloading token', data=b'',
+ headers={'Content-Type': 'application/json'})['vrtPlayerToken']
+ data = self._download_json(
+ '%s/videos/%s' % (self._REST_API_BASE, video_id),
+ video_id, 'Downloading video JSON', fatal=False, query={
+ 'vrtPlayerToken': token,
+ 'client': '%s@PROD' % site_id,
+ }, expected_status=400)
+ message = data.get('message')
+ if message and not data.get('title'):
+ if data.get('code') == 'AUTHENTICATION_REQUIRED':
+ self.raise_login_required(message)
+ raise ExtractorError(message, expected=True)
+
+ title = data['title']
+ description = data.get('description')
+
+ formats = []
+ for target in data['targetUrls']:
+ format_url, format_type = url_or_none(target.get('url')), str_or_none(target.get('type'))
+ if not format_url or not format_type:
+ continue
+ format_type = format_type.upper()
+ if format_type in self._HLS_ENTRY_PROTOCOLS_MAP:
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', self._HLS_ENTRY_PROTOCOLS_MAP[format_type],
+ m3u8_id=format_type, fatal=False))
+ elif format_type == 'HDS':
+ formats.extend(self._extract_f4m_formats(
+ format_url, video_id, f4m_id=format_type, fatal=False))
+ elif format_type == 'MPEG_DASH':
+ formats.extend(self._extract_mpd_formats(
+ format_url, video_id, mpd_id=format_type, fatal=False))
+ elif format_type == 'HSS':
+ formats.extend(self._extract_ism_formats(
+ format_url, video_id, ism_id='mss', fatal=False))
+ else:
+ formats.append({
+ 'format_id': format_type,
+ 'url': format_url,
+ })
+ self._sort_formats(formats)
+
+ subtitles = {}
+ subtitle_urls = data.get('subtitleUrls')
+ if isinstance(subtitle_urls, list):
+ for subtitle in subtitle_urls:
+ subtitle_url = subtitle.get('url')
+ if subtitle_url and subtitle.get('type') == 'CLOSED':
+ subtitles.setdefault('nl', []).append({'url': subtitle_url})
+
+ return {
+ 'id': video_id,
+ 'display_id': video_id,
+ 'title': title,
+ 'description': description,
+ 'formats': formats,
+ 'duration': float_or_none(data.get('duration'), 1000),
+ 'thumbnail': data.get('posterImageUrl'),
+ 'subtitles': subtitles,
+ }
+
+
+class CanvasEenIE(InfoExtractor):
+ IE_DESC = 'canvas.be and een.be'
+ _VALID_URL = r'https?://(?:www\.)?(?P<site_id>canvas|een)\.be/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://www.canvas.be/video/de-afspraak/najaar-2015/de-afspraak-veilt-voor-de-warmste-week',
+ 'md5': 'ed66976748d12350b118455979cca293',
+ 'info_dict': {
+ 'id': 'mz-ast-5e5f90b6-2d72-4c40-82c2-e134f884e93e',
+ 'display_id': 'de-afspraak-veilt-voor-de-warmste-week',
+ 'ext': 'flv',
+ 'title': 'De afspraak veilt voor de Warmste Week',
+ 'description': 'md5:24cb860c320dc2be7358e0e5aa317ba6',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 49.02,
+ },
+ 'expected_warnings': ['is not a supported codec'],
+ }, {
+ # with subtitles
+ 'url': 'http://www.canvas.be/video/panorama/2016/pieter-0167',
+ 'info_dict': {
+ 'id': 'mz-ast-5240ff21-2d30-4101-bba6-92b5ec67c625',
+ 'display_id': 'pieter-0167',
+ 'ext': 'mp4',
+ 'title': 'Pieter 0167',
+ 'description': 'md5:943cd30f48a5d29ba02c3a104dc4ec4e',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 2553.08,
+ 'subtitles': {
+ 'nl': [{
+ 'ext': 'vtt',
+ }],
+ },
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Pagina niet gevonden',
+ }, {
+ 'url': 'https://www.een.be/thuis/emma-pakt-thilly-aan',
+ 'info_dict': {
+ 'id': 'md-ast-3a24ced2-64d7-44fb-b4ed-ed1aafbf90b8',
+ 'display_id': 'emma-pakt-thilly-aan',
+ 'ext': 'mp4',
+ 'title': 'Emma pakt Thilly aan',
+ 'description': 'md5:c5c9b572388a99b2690030afa3f3bad7',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 118.24,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['is not a supported codec'],
+ }, {
+ 'url': 'https://www.canvas.be/check-point/najaar-2016/de-politie-uw-vriend',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ site_id, display_id = mobj.group('site_id'), mobj.group('id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ title = strip_or_none(self._search_regex(
+ r'<h1[^>]+class="video__body__header__title"[^>]*>(.+?)</h1>',
+ webpage, 'title', default=None) or self._og_search_title(
+ webpage, default=None))
+
+ video_id = self._html_search_regex(
+ r'data-video=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id',
+ group='id')
+
+ return {
+ '_type': 'url_transparent',
+ 'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (site_id, video_id),
+ 'ie_key': CanvasIE.ie_key(),
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': self._og_search_description(webpage),
+ }
+
+
+class VrtNUIE(GigyaBaseIE):
+ IE_DESC = 'VrtNU.be'
+ _VALID_URL = r'https?://(?:www\.)?vrt\.be/(?P<site_id>vrtnu)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ # Available via old API endpoint
+ 'url': 'https://www.vrt.be/vrtnu/a-z/postbus-x/1/postbus-x-s1a1/',
+ 'info_dict': {
+ 'id': 'pbs-pub-2e2d8c27-df26-45c9-9dc6-90c78153044d$vid-90c932b1-e21d-4fb8-99b1-db7b49cf74de',
+ 'ext': 'mp4',
+ 'title': 'De zwarte weduwe',
+ 'description': 'md5:db1227b0f318c849ba5eab1fef895ee4',
+ 'duration': 1457.04,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'skip': 'This video is only available for registered users',
+ 'params': {
+ 'username': '<snip>',
+ 'password': '<snip>',
+ },
+ 'expected_warnings': ['is not a supported codec'],
+ }, {
+ # Only available via new API endpoint
+ 'url': 'https://www.vrt.be/vrtnu/a-z/kamp-waes/1/kamp-waes-s1a5/',
+ 'info_dict': {
+ 'id': 'pbs-pub-0763b56c-64fb-4d38-b95b-af60bf433c71$vid-ad36a73c-4735-4f1f-b2c0-a38e6e6aa7e1',
+ 'ext': 'mp4',
+ 'title': 'Aflevering 5',
+ 'description': 'Wie valt door de mand tijdens een missie?',
+ 'duration': 2967.06,
+ 'season': 'Season 1',
+ 'season_number': 1,
+ 'episode_number': 5,
+ },
+ 'skip': 'This video is only available for registered users',
+ 'params': {
+ 'username': '<snip>',
+ 'password': '<snip>',
+ },
+ 'expected_warnings': ['Unable to download asset JSON', 'is not a supported codec', 'Unknown MIME type'],
+ }]
+ _NETRC_MACHINE = 'vrtnu'
+ _APIKEY = '3_0Z2HujMtiWq_pkAjgnS2Md2E11a1AwZjYiBETtwNE-EoEHDINgtnvcAOpNgmrVGy'
+ _CONTEXT_ID = 'R3595707040'
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ auth_data = {
+ 'APIKey': self._APIKEY,
+ 'targetEnv': 'jssdk',
+ 'loginID': username,
+ 'password': password,
+ 'authMode': 'cookie',
+ }
+
+ auth_info = self._gigya_login(auth_data)
+
+ # Sometimes authentication fails for no good reason, retry
+ login_attempt = 1
+ while login_attempt <= 3:
+ try:
+ # When requesting a token, no actual token is returned, but the
+ # necessary cookies are set.
+ self._request_webpage(
+ 'https://token.vrt.be',
+ None, note='Requesting a token', errnote='Could not get a token',
+ headers={
+ 'Content-Type': 'application/json',
+ 'Referer': 'https://www.vrt.be/vrtnu/',
+ },
+ data=json.dumps({
+ 'uid': auth_info['UID'],
+ 'uidsig': auth_info['UIDSignature'],
+ 'ts': auth_info['signatureTimestamp'],
+ 'email': auth_info['profile']['email'],
+ }).encode('utf-8'))
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ login_attempt += 1
+ self.report_warning('Authentication failed')
+ self._sleep(1, None, msg_template='Waiting for %(timeout)s seconds before trying again')
+ else:
+ raise e
+ else:
+ break
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage, urlh = self._download_webpage_handle(url, display_id)
+
+ info = self._search_json_ld(webpage, display_id, default={})
+
+ # title is optional here since it may be extracted by extractor
+ # that is delegated from here
+ title = strip_or_none(self._html_search_regex(
+ r'(?ms)<h1 class="content__heading">(.+?)</h1>',
+ webpage, 'title', default=None))
+
+ description = self._html_search_regex(
+ r'(?ms)<div class="content__description">(.+?)</div>',
+ webpage, 'description', default=None)
+
+ season = self._html_search_regex(
+ [r'''(?xms)<div\ class="tabs__tab\ tabs__tab--active">\s*
+ <span>seizoen\ (.+?)</span>\s*
+ </div>''',
+ r'<option value="seizoen (\d{1,3})" data-href="[^"]+?" selected>'],
+ webpage, 'season', default=None)
+
+ season_number = int_or_none(season)
+
+ episode_number = int_or_none(self._html_search_regex(
+ r'''(?xms)<div\ class="content__episode">\s*
+ <abbr\ title="aflevering">afl</abbr>\s*<span>(\d+)</span>
+ </div>''',
+ webpage, 'episode_number', default=None))
+
+ release_date = parse_iso8601(self._html_search_regex(
+ r'(?ms)<div class="content__broadcastdate">\s*<time\ datetime="(.+?)"',
+ webpage, 'release_date', default=None))
+
+ # If there's a ? or a # in the URL, remove them and everything after
+ clean_url = urlh.geturl().split('?')[0].split('#')[0].strip('/')
+ securevideo_url = clean_url + '.mssecurevideo.json'
+
+ try:
+ video = self._download_json(securevideo_url, display_id)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ self.raise_login_required()
+ raise
+
+ # We are dealing with a '../<show>.relevant' URL
+ redirect_url = video.get('url')
+ if redirect_url:
+ return self.url_result(self._proto_relative_url(redirect_url, 'https:'))
+
+ # There is only one entry, but with an unknown key, so just get
+ # the first one
+ video_id = list(video.values())[0].get('videoid')
+
+ return merge_dicts(info, {
+ '_type': 'url_transparent',
+ 'url': 'https://mediazone.vrt.be/api/v1/vrtvideo/assets/%s' % video_id,
+ 'ie_key': CanvasIE.ie_key(),
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'season': season,
+ 'season_number': season_number,
+ 'episode_number': episode_number,
+ 'release_date': release_date,
+ })
diff --git a/youtube_dl/extractor/carambatv.py b/youtube_dlc/extractor/carambatv.py
index b57b86af7..b57b86af7 100644
--- a/youtube_dl/extractor/carambatv.py
+++ b/youtube_dlc/extractor/carambatv.py
diff --git a/youtube_dl/extractor/cartoonnetwork.py b/youtube_dlc/extractor/cartoonnetwork.py
index 48b33617f..48b33617f 100644
--- a/youtube_dl/extractor/cartoonnetwork.py
+++ b/youtube_dlc/extractor/cartoonnetwork.py
diff --git a/youtube_dlc/extractor/cbc.py b/youtube_dlc/extractor/cbc.py
new file mode 100644
index 000000000..fd5ec6033
--- /dev/null
+++ b/youtube_dlc/extractor/cbc.py
@@ -0,0 +1,497 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import hashlib
+import json
+import re
+from xml.sax.saxutils import escape
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_HTTPError,
+)
+from ..utils import (
+ js_to_json,
+ smuggle_url,
+ try_get,
+ xpath_text,
+ xpath_element,
+ xpath_with_ns,
+ find_xpath_attr,
+ orderedSet,
+ parse_duration,
+ parse_iso8601,
+ parse_age_limit,
+ strip_or_none,
+ int_or_none,
+ ExtractorError,
+)
+
+
+class CBCIE(InfoExtractor):
+ IE_NAME = 'cbc.ca'
+ _VALID_URL = r'https?://(?:www\.)?cbc\.ca/(?!player/)(?:[^/]+/)+(?P<id>[^/?#]+)'
+ _TESTS = [{
+ # with mediaId
+ 'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs',
+ 'md5': '97e24d09672fc4cf56256d6faa6c25bc',
+ 'info_dict': {
+ 'id': '2682904050',
+ 'ext': 'mp4',
+ 'title': 'Don Cherry – All-Stars',
+ 'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.',
+ 'timestamp': 1454463000,
+ 'upload_date': '20160203',
+ 'uploader': 'CBCC-NEW',
+ },
+ 'skip': 'Geo-restricted to Canada',
+ }, {
+ # with clipId, feed available via tpfeed.cbc.ca and feed.theplatform.com
+ 'url': 'http://www.cbc.ca/22minutes/videos/22-minutes-update/22-minutes-update-episode-4',
+ 'md5': '162adfa070274b144f4fdc3c3b8207db',
+ 'info_dict': {
+ 'id': '2414435309',
+ 'ext': 'mp4',
+ 'title': '22 Minutes Update: What Not To Wear Quebec',
+ 'description': "This week's latest Canadian top political story is What Not To Wear Quebec.",
+ 'upload_date': '20131025',
+ 'uploader': 'CBCC-NEW',
+ 'timestamp': 1382717907,
+ },
+ }, {
+ # with clipId, feed only available via tpfeed.cbc.ca
+ 'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
+ 'md5': '0274a90b51a9b4971fe005c63f592f12',
+ 'info_dict': {
+ 'id': '2487345465',
+ 'ext': 'mp4',
+ 'title': 'Robin Williams freestyles on 90 Minutes Live',
+ 'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.',
+ 'upload_date': '19780210',
+ 'uploader': 'CBCC-NEW',
+ 'timestamp': 255977160,
+ },
+ }, {
+ # multiple iframes
+ 'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot',
+ 'playlist': [{
+ 'md5': '377572d0b49c4ce0c9ad77470e0b96b4',
+ 'info_dict': {
+ 'id': '2680832926',
+ 'ext': 'mp4',
+ 'title': 'An Eagle\'s-Eye View Off Burrard Bridge',
+ 'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.',
+ 'upload_date': '20160201',
+ 'timestamp': 1454342820,
+ 'uploader': 'CBCC-NEW',
+ },
+ }, {
+ 'md5': '415a0e3f586113894174dfb31aa5bb1a',
+ 'info_dict': {
+ 'id': '2658915080',
+ 'ext': 'mp4',
+ 'title': 'Fly like an eagle!',
+ 'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower',
+ 'upload_date': '20150315',
+ 'timestamp': 1426443984,
+ 'uploader': 'CBCC-NEW',
+ },
+ }],
+ 'skip': 'Geo-restricted to Canada',
+ }, {
+ # multiple CBC.APP.Caffeine.initInstance(...)
+ 'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238',
+ 'info_dict': {
+ 'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks',
+ 'id': 'dog-indoor-exercise-winter-1.3928238',
+ 'description': 'md5:c18552e41726ee95bd75210d1ca9194c',
+ },
+ 'playlist_mincount': 6,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if CBCPlayerIE.suitable(url) else super(CBCIE, cls).suitable(url)
+
+ def _extract_player_init(self, player_init, display_id):
+ player_info = self._parse_json(player_init, display_id, js_to_json)
+ media_id = player_info.get('mediaId')
+ if not media_id:
+ clip_id = player_info['clipId']
+ feed = self._download_json(
+ 'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id,
+ clip_id, fatal=False)
+ if feed:
+ media_id = try_get(feed, lambda x: x['entries'][0]['guid'], compat_str)
+ if not media_id:
+ media_id = self._download_json(
+ 'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id,
+ clip_id)['entries'][0]['id'].split('/')[-1]
+ return self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ title = self._og_search_title(webpage, default=None) or self._html_search_meta(
+ 'twitter:title', webpage, 'title', default=None) or self._html_search_regex(
+ r'<title>([^<]+)</title>', webpage, 'title', fatal=False)
+ entries = [
+ self._extract_player_init(player_init, display_id)
+ for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]
+ media_ids = []
+ for media_id_re in (
+ r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"',
+ r'<div[^>]+\bid=["\']player-(\d+)',
+ r'guid["\']\s*:\s*["\'](\d+)'):
+ media_ids.extend(re.findall(media_id_re, webpage))
+ entries.extend([
+ self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
+ for media_id in orderedSet(media_ids)])
+ return self.playlist_result(
+ entries, display_id, strip_or_none(title),
+ self._og_search_description(webpage))
+
+
+class CBCPlayerIE(InfoExtractor):
+ IE_NAME = 'cbc.ca:player'
+ _VALID_URL = r'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.cbc.ca/player/play/2683190193',
+ 'md5': '64d25f841ddf4ddb28a235338af32e2c',
+ 'info_dict': {
+ 'id': '2683190193',
+ 'ext': 'mp4',
+ 'title': 'Gerry Runs a Sweat Shop',
+ 'description': 'md5:b457e1c01e8ff408d9d801c1c2cd29b0',
+ 'timestamp': 1455071400,
+ 'upload_date': '20160210',
+ 'uploader': 'CBCC-NEW',
+ },
+ 'skip': 'Geo-restricted to Canada',
+ }, {
+ # Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
+ 'url': 'http://www.cbc.ca/player/play/2657631896',
+ 'md5': 'e5e708c34ae6fca156aafe17c43e8b75',
+ 'info_dict': {
+ 'id': '2657631896',
+ 'ext': 'mp3',
+ 'title': 'CBC Montreal is organizing its first ever community hackathon!',
+ 'description': 'The modern technology we tend to depend on so heavily, is never without it\'s share of hiccups and headaches. Next weekend - CBC Montreal will be getting members of the public for its first Hackathon.',
+ 'timestamp': 1425704400,
+ 'upload_date': '20150307',
+ 'uploader': 'CBCC-NEW',
+ },
+ }, {
+ 'url': 'http://www.cbc.ca/player/play/2164402062',
+ 'md5': '33fcd8f6719b9dd60a5e73adcb83b9f6',
+ 'info_dict': {
+ 'id': '2164402062',
+ 'ext': 'mp4',
+ 'title': 'Cancer survivor four times over',
+ 'description': 'Tim Mayer has beaten three different forms of cancer four times in five years.',
+ 'timestamp': 1320410746,
+ 'upload_date': '20111104',
+ 'uploader': 'CBCC-NEW',
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'ThePlatform',
+ 'url': smuggle_url(
+ 'http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/%s?mbr=true&formats=MPEG4,FLV,MP3' % video_id, {
+ 'force_smil_url': True
+ }),
+ 'id': video_id,
+ }
+
+
+class CBCWatchBaseIE(InfoExtractor):
+ _device_id = None
+ _device_token = None
+ _API_BASE_URL = 'https://api-cbc.cloud.clearleap.com/cloffice/client/'
+ _NS_MAP = {
+ 'media': 'http://search.yahoo.com/mrss/',
+ 'clearleap': 'http://www.clearleap.com/namespace/clearleap/1.0/',
+ }
+ _GEO_COUNTRIES = ['CA']
+ _LOGIN_URL = 'https://api.loginradius.com/identity/v2/auth/login'
+ _TOKEN_URL = 'https://cloud-api.loginradius.com/sso/jwt/api/token'
+ _API_KEY = '3f4beddd-2061-49b0-ae80-6f1f2ed65b37'
+ _NETRC_MACHINE = 'cbcwatch'
+
+ def _signature(self, email, password):
+ data = json.dumps({
+ 'email': email,
+ 'password': password,
+ }).encode()
+ headers = {'content-type': 'application/json'}
+ query = {'apikey': self._API_KEY}
+ resp = self._download_json(self._LOGIN_URL, None, data=data, headers=headers, query=query)
+ access_token = resp['access_token']
+
+ # token
+ query = {
+ 'access_token': access_token,
+ 'apikey': self._API_KEY,
+ 'jwtapp': 'jwt',
+ }
+ resp = self._download_json(self._TOKEN_URL, None, headers=headers, query=query)
+ return resp['signature']
+
+ def _call_api(self, path, video_id):
+ url = path if path.startswith('http') else self._API_BASE_URL + path
+ for _ in range(2):
+ try:
+ result = self._download_xml(url, video_id, headers={
+ 'X-Clearleap-DeviceId': self._device_id,
+ 'X-Clearleap-DeviceToken': self._device_token,
+ })
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ # Device token has expired, re-acquiring device token
+ self._register_device()
+ continue
+ raise
+ error_message = xpath_text(result, 'userMessage') or xpath_text(result, 'systemMessage')
+ if error_message:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message))
+ return result
+
+ def _real_initialize(self):
+ if self._valid_device_token():
+ return
+ device = self._downloader.cache.load(
+ 'cbcwatch', self._cache_device_key()) or {}
+ self._device_id, self._device_token = device.get('id'), device.get('token')
+ if self._valid_device_token():
+ return
+ self._register_device()
+
+ def _valid_device_token(self):
+ return self._device_id and self._device_token
+
+ def _cache_device_key(self):
+ email, _ = self._get_login_info()
+ return '%s_device' % hashlib.sha256(email.encode()).hexdigest() if email else 'device'
+
+ def _register_device(self):
+ result = self._download_xml(
+ self._API_BASE_URL + 'device/register',
+ None, 'Acquiring device token',
+ data=b'<device><type>web</type></device>')
+ self._device_id = xpath_text(result, 'deviceId', fatal=True)
+ email, password = self._get_login_info()
+ if email and password:
+ signature = self._signature(email, password)
+ data = '<login><token>{0}</token><device><deviceId>{1}</deviceId><type>web</type></device></login>'.format(
+ escape(signature), escape(self._device_id)).encode()
+ url = self._API_BASE_URL + 'device/login'
+ result = self._download_xml(
+ url, None, data=data,
+ headers={'content-type': 'application/xml'})
+ self._device_token = xpath_text(result, 'token', fatal=True)
+ else:
+ self._device_token = xpath_text(result, 'deviceToken', fatal=True)
+ self._downloader.cache.store(
+ 'cbcwatch', self._cache_device_key(), {
+ 'id': self._device_id,
+ 'token': self._device_token,
+ })
+
+ def _parse_rss_feed(self, rss):
+ channel = xpath_element(rss, 'channel', fatal=True)
+
+ def _add_ns(path):
+ return xpath_with_ns(path, self._NS_MAP)
+
+ entries = []
+ for item in channel.findall('item'):
+ guid = xpath_text(item, 'guid', fatal=True)
+ title = xpath_text(item, 'title', fatal=True)
+
+ media_group = xpath_element(item, _add_ns('media:group'), fatal=True)
+ content = xpath_element(media_group, _add_ns('media:content'), fatal=True)
+ content_url = content.attrib['url']
+
+ thumbnails = []
+ for thumbnail in media_group.findall(_add_ns('media:thumbnail')):
+ thumbnail_url = thumbnail.get('url')
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'id': thumbnail.get('profile'),
+ 'url': thumbnail_url,
+ 'width': int_or_none(thumbnail.get('width')),
+ 'height': int_or_none(thumbnail.get('height')),
+ })
+
+ timestamp = None
+ release_date = find_xpath_attr(
+ item, _add_ns('media:credit'), 'role', 'releaseDate')
+ if release_date is not None:
+ timestamp = parse_iso8601(release_date.text)
+
+ entries.append({
+ '_type': 'url_transparent',
+ 'url': content_url,
+ 'id': guid,
+ 'title': title,
+ 'description': xpath_text(item, 'description'),
+ 'timestamp': timestamp,
+ 'duration': int_or_none(content.get('duration')),
+ 'age_limit': parse_age_limit(xpath_text(item, _add_ns('media:rating'))),
+ 'episode': xpath_text(item, _add_ns('clearleap:episode')),
+ 'episode_number': int_or_none(xpath_text(item, _add_ns('clearleap:episodeInSeason'))),
+ 'series': xpath_text(item, _add_ns('clearleap:series')),
+ 'season_number': int_or_none(xpath_text(item, _add_ns('clearleap:season'))),
+ 'thumbnails': thumbnails,
+ 'ie_key': 'CBCWatchVideo',
+ })
+
+ return self.playlist_result(
+ entries, xpath_text(channel, 'guid'),
+ xpath_text(channel, 'title'),
+ xpath_text(channel, 'description'))
+
+
+class CBCWatchVideoIE(CBCWatchBaseIE):
+ IE_NAME = 'cbc.ca:watch:video'
+ _VALID_URL = r'https?://api-cbc\.cloud\.clearleap\.com/cloffice/client/web/play/?\?.*?\bcontentId=(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _TEST = {
+ # geo-restricted to Canada, bypassable
+ 'url': 'https://api-cbc.cloud.clearleap.com/cloffice/client/web/play/?contentId=3c84472a-1eea-4dee-9267-2655d5055dcf&categoryId=ebc258f5-ee40-4cca-b66b-ba6bd55b7235',
+ 'only_matching': True,
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ result = self._call_api(url, video_id)
+
+ m3u8_url = xpath_text(result, 'url', fatal=True)
+ formats = self._extract_m3u8_formats(re.sub(r'/([^/]+)/[^/?]+\.m3u8', r'/\1/\1.m3u8', m3u8_url), video_id, 'mp4', fatal=False)
+ if len(formats) < 2:
+ formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4')
+ for f in formats:
+ format_id = f.get('format_id')
+ if format_id.startswith('AAC'):
+ f['acodec'] = 'aac'
+ elif format_id.startswith('AC3'):
+ f['acodec'] = 'ac-3'
+ self._sort_formats(formats)
+
+ info = {
+ 'id': video_id,
+ 'title': video_id,
+ 'formats': formats,
+ }
+
+ rss = xpath_element(result, 'rss')
+ if rss:
+ info.update(self._parse_rss_feed(rss)['entries'][0])
+ del info['url']
+ del info['_type']
+ del info['ie_key']
+ return info
+
+
+class CBCWatchIE(CBCWatchBaseIE):
+ IE_NAME = 'cbc.ca:watch'
+ _VALID_URL = r'https?://(?:gem|watch)\.cbc\.ca/(?:[^/]+/)+(?P<id>[0-9a-f-]+)'
+ _TESTS = [{
+ # geo-restricted to Canada, bypassable
+ 'url': 'http://watch.cbc.ca/doc-zone/season-6/customer-disservice/38e815a-009e3ab12e4',
+ 'info_dict': {
+ 'id': '9673749a-5e77-484c-8b62-a1092a6b5168',
+ 'ext': 'mp4',
+ 'title': 'Customer (Dis)Service',
+ 'description': 'md5:8bdd6913a0fe03d4b2a17ebe169c7c87',
+ 'upload_date': '20160219',
+ 'timestamp': 1455840000,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ 'format': 'bestvideo',
+ },
+ }, {
+ # geo-restricted to Canada, bypassable
+ 'url': 'http://watch.cbc.ca/arthur/all/1ed4b385-cd84-49cf-95f0-80f004680057',
+ 'info_dict': {
+ 'id': '1ed4b385-cd84-49cf-95f0-80f004680057',
+ 'title': 'Arthur',
+ 'description': 'Arthur, the sweetest 8-year-old aardvark, and his pals solve all kinds of problems with humour, kindness and teamwork.',
+ },
+ 'playlist_mincount': 30,
+ }, {
+ 'url': 'https://gem.cbc.ca/media/this-hour-has-22-minutes/season-26/episode-20/38e815a-0108c6c6a42',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ rss = self._call_api('web/browse/' + video_id, video_id)
+ return self._parse_rss_feed(rss)
+
+
+class CBCOlympicsIE(InfoExtractor):
+ IE_NAME = 'cbc.ca:olympics'
+ _VALID_URL = r'https?://olympics\.cbc\.ca/video/[^/]+/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://olympics.cbc.ca/video/whats-on-tv/olympic-morning-featuring-the-opening-ceremony/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._hidden_inputs(webpage)['videoId']
+ video_doc = self._download_xml(
+ 'https://olympics.cbc.ca/videodata/%s.xml' % video_id, video_id)
+ title = xpath_text(video_doc, 'title', fatal=True)
+ is_live = xpath_text(video_doc, 'kind') == 'Live'
+ if is_live:
+ title = self._live_title(title)
+
+ formats = []
+ for video_source in video_doc.findall('videoSources/videoSource'):
+ uri = xpath_text(video_source, 'uri')
+ if not uri:
+ continue
+ tokenize = self._download_json(
+ 'https://olympics.cbc.ca/api/api-akamai/tokenize',
+ video_id, data=json.dumps({
+ 'VideoSource': uri,
+ }).encode(), headers={
+ 'Content-Type': 'application/json',
+ 'Referer': url,
+ # d3.VideoPlayer._init in https://olympics.cbc.ca/components/script/base.js
+ 'Cookie': '_dvp=TK:C0ObxjerU', # AKAMAI CDN cookie
+ }, fatal=False)
+ if not tokenize:
+ continue
+ content_url = tokenize['ContentUrl']
+ video_source_format = video_source.get('format')
+ if video_source_format == 'IIS':
+ formats.extend(self._extract_ism_formats(
+ content_url, video_id, ism_id=video_source_format, fatal=False))
+ else:
+ formats.extend(self._extract_m3u8_formats(
+ content_url, video_id, 'mp4',
+ 'm3u8' if is_live else 'm3u8_native',
+ m3u8_id=video_source_format, fatal=False))
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': xpath_text(video_doc, 'description'),
+ 'thumbnail': xpath_text(video_doc, 'thumbnailUrl'),
+ 'duration': parse_duration(xpath_text(video_doc, 'duration')),
+ 'formats': formats,
+ 'is_live': is_live,
+ }
diff --git a/youtube_dl/extractor/cbs.py b/youtube_dlc/extractor/cbs.py
index 4a19a73d2..4a19a73d2 100644
--- a/youtube_dl/extractor/cbs.py
+++ b/youtube_dlc/extractor/cbs.py
diff --git a/youtube_dl/extractor/cbsinteractive.py b/youtube_dlc/extractor/cbsinteractive.py
index 6596e98a6..6596e98a6 100644
--- a/youtube_dl/extractor/cbsinteractive.py
+++ b/youtube_dlc/extractor/cbsinteractive.py
diff --git a/youtube_dl/extractor/cbslocal.py b/youtube_dlc/extractor/cbslocal.py
index 90852a9ef..90852a9ef 100644
--- a/youtube_dl/extractor/cbslocal.py
+++ b/youtube_dlc/extractor/cbslocal.py
diff --git a/youtube_dl/extractor/cbsnews.py b/youtube_dlc/extractor/cbsnews.py
index 345debcf0..345debcf0 100644
--- a/youtube_dl/extractor/cbsnews.py
+++ b/youtube_dlc/extractor/cbsnews.py
diff --git a/youtube_dl/extractor/cbssports.py b/youtube_dlc/extractor/cbssports.py
index 83b764762..83b764762 100644
--- a/youtube_dl/extractor/cbssports.py
+++ b/youtube_dlc/extractor/cbssports.py
diff --git a/youtube_dl/extractor/ccc.py b/youtube_dlc/extractor/ccc.py
index 36e6dff72..36e6dff72 100644
--- a/youtube_dl/extractor/ccc.py
+++ b/youtube_dlc/extractor/ccc.py
diff --git a/youtube_dl/extractor/ccma.py b/youtube_dlc/extractor/ccma.py
index 544647f92..544647f92 100644
--- a/youtube_dl/extractor/ccma.py
+++ b/youtube_dlc/extractor/ccma.py
diff --git a/youtube_dl/extractor/cctv.py b/youtube_dlc/extractor/cctv.py
index c76f361c6..c76f361c6 100644
--- a/youtube_dl/extractor/cctv.py
+++ b/youtube_dlc/extractor/cctv.py
diff --git a/youtube_dl/extractor/cda.py b/youtube_dlc/extractor/cda.py
index 0c3af23d5..0c3af23d5 100644
--- a/youtube_dl/extractor/cda.py
+++ b/youtube_dlc/extractor/cda.py
diff --git a/youtube_dlc/extractor/ceskatelevize.py b/youtube_dlc/extractor/ceskatelevize.py
new file mode 100644
index 000000000..7cb4efb74
--- /dev/null
+++ b/youtube_dlc/extractor/ceskatelevize.py
@@ -0,0 +1,289 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urllib_parse_unquote,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ sanitized_Request,
+ unescapeHTML,
+ update_url_query,
+ urlencode_postdata,
+ USER_AGENTS,
+)
+
+
+class CeskaTelevizeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
+ _TESTS = [{
+ 'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220',
+ 'info_dict': {
+ 'id': '61924494877246241',
+ 'ext': 'mp4',
+ 'title': 'Hyde Park Civilizace: Život v Grónsku',
+ 'description': 'md5:3fec8f6bb497be5cdb0c9e8781076626',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 3350,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en',
+ 'info_dict': {
+ 'id': '61924494877028507',
+ 'ext': 'mp4',
+ 'title': 'Hyde Park Civilizace: Bonus 01 - En',
+ 'description': 'English Subtittles',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 81.3,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # live stream
+ 'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/',
+ 'info_dict': {
+ 'id': 402,
+ 'ext': 'mp4',
+ 'title': r're:^ČT Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+ 'is_live': True,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'Georestricted to Czech Republic',
+ }, {
+ 'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.'
+ if '%s</p>' % NOT_AVAILABLE_STRING in webpage:
+ raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
+
+ type_ = None
+ episode_id = None
+
+ playlist = self._parse_json(
+ self._search_regex(
+ r'getPlaylistUrl\(\[({.+?})\]', webpage, 'playlist',
+ default='{}'), playlist_id)
+ if playlist:
+ type_ = playlist.get('type')
+ episode_id = playlist.get('id')
+
+ if not type_:
+ type_ = self._html_search_regex(
+ r'getPlaylistUrl\(\[\{"type":"(.+?)","id":".+?"\}\],',
+ webpage, 'type')
+ if not episode_id:
+ episode_id = self._html_search_regex(
+ r'getPlaylistUrl\(\[\{"type":".+?","id":"(.+?)"\}\],',
+ webpage, 'episode_id')
+
+ data = {
+ 'playlist[0][type]': type_,
+ 'playlist[0][id]': episode_id,
+ 'requestUrl': compat_urllib_parse_urlparse(url).path,
+ 'requestSource': 'iVysilani',
+ }
+
+ entries = []
+
+ for user_agent in (None, USER_AGENTS['Safari']):
+ req = sanitized_Request(
+ 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist',
+ data=urlencode_postdata(data))
+
+ req.add_header('Content-type', 'application/x-www-form-urlencoded')
+ req.add_header('x-addr', '127.0.0.1')
+ req.add_header('X-Requested-With', 'XMLHttpRequest')
+ if user_agent:
+ req.add_header('User-Agent', user_agent)
+ req.add_header('Referer', url)
+
+ playlistpage = self._download_json(req, playlist_id, fatal=False)
+
+ if not playlistpage:
+ continue
+
+ playlist_url = playlistpage['url']
+ if playlist_url == 'error_region':
+ raise ExtractorError(NOT_AVAILABLE_STRING, expected=True)
+
+ req = sanitized_Request(compat_urllib_parse_unquote(playlist_url))
+ req.add_header('Referer', url)
+
+ playlist_title = self._og_search_title(webpage, default=None)
+ playlist_description = self._og_search_description(webpage, default=None)
+
+ playlist = self._download_json(req, playlist_id, fatal=False)
+ if not playlist:
+ continue
+
+ playlist = playlist.get('playlist')
+ if not isinstance(playlist, list):
+ continue
+
+ playlist_len = len(playlist)
+
+ for num, item in enumerate(playlist):
+ is_live = item.get('type') == 'LIVE'
+ formats = []
+ for format_id, stream_url in item.get('streamUrls', {}).items():
+ if 'drmOnly=true' in stream_url:
+ continue
+ if 'playerType=flash' in stream_url:
+ stream_formats = self._extract_m3u8_formats(
+ stream_url, playlist_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls-%s' % format_id, fatal=False)
+ else:
+ stream_formats = self._extract_mpd_formats(
+ stream_url, playlist_id,
+ mpd_id='dash-%s' % format_id, fatal=False)
+ # See https://github.com/ytdl-org/youtube-dl/issues/12119#issuecomment-280037031
+ if format_id == 'audioDescription':
+ for f in stream_formats:
+ f['source_preference'] = -10
+ formats.extend(stream_formats)
+
+ if user_agent and len(entries) == playlist_len:
+ entries[num]['formats'].extend(formats)
+ continue
+
+ item_id = item.get('id') or item['assetId']
+ title = item['title']
+
+ duration = float_or_none(item.get('duration'))
+ thumbnail = item.get('previewImageUrl')
+
+ subtitles = {}
+ if item.get('type') == 'VOD':
+ subs = item.get('subtitles')
+ if subs:
+ subtitles = self.extract_subtitles(episode_id, subs)
+
+ if playlist_len == 1:
+ final_title = playlist_title or title
+ if is_live:
+ final_title = self._live_title(final_title)
+ else:
+ final_title = '%s (%s)' % (playlist_title, title)
+
+ entries.append({
+ 'id': item_id,
+ 'title': final_title,
+ 'description': playlist_description if playlist_len == 1 else None,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': is_live,
+ })
+
+ for e in entries:
+ self._sort_formats(e['formats'])
+
+ return self.playlist_result(entries, playlist_id, playlist_title, playlist_description)
+
+ def _get_subtitles(self, episode_id, subs):
+ original_subtitles = self._download_webpage(
+ subs[0]['url'], episode_id, 'Downloading subtitles')
+ srt_subs = self._fix_subtitles(original_subtitles)
+ return {
+ 'cs': [{
+ 'ext': 'srt',
+ 'data': srt_subs,
+ }]
+ }
+
+ @staticmethod
+ def _fix_subtitles(subtitles):
+ """ Convert millisecond-based subtitles to SRT """
+
+ def _msectotimecode(msec):
+ """ Helper utility to convert milliseconds to timecode """
+ components = []
+ for divider in [1000, 60, 60, 100]:
+ components.append(msec % divider)
+ msec //= divider
+ return '{3:02}:{2:02}:{1:02},{0:03}'.format(*components)
+
+ def _fix_subtitle(subtitle):
+ for line in subtitle.splitlines():
+ m = re.match(r'^\s*([0-9]+);\s*([0-9]+)\s+([0-9]+)\s*$', line)
+ if m:
+ yield m.group(1)
+ start, stop = (_msectotimecode(int(t)) for t in m.groups()[1:])
+ yield '{0} --> {1}'.format(start, stop)
+ else:
+ yield line
+
+ return '\r\n'.join(_fix_subtitle(subtitles))
+
+
+class CeskaTelevizePoradyIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P<id>[^/#?]+)'
+ _TESTS = [{
+ # video with 18+ caution trailer
+ 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/',
+ 'info_dict': {
+ 'id': '215562210900007-bogotart',
+ 'title': 'Queer: Bogotart',
+ 'description': 'Alternativní průvodce současným queer světem',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '61924494876844842',
+ 'ext': 'mp4',
+ 'title': 'Queer: Bogotart (Varování 18+)',
+ 'duration': 10.2,
+ },
+ }, {
+ 'info_dict': {
+ 'id': '61924494877068022',
+ 'ext': 'mp4',
+ 'title': 'Queer: Bogotart (Queer)',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 1558.3,
+ },
+ }],
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # iframe embed
+ 'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ data_url = update_url_query(unescapeHTML(self._search_regex(
+ (r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
+ r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'),
+ webpage, 'iframe player url', group='url')), query={
+ 'autoStart': 'true',
+ })
+
+ return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key())
diff --git a/youtube_dlc/extractor/channel9.py b/youtube_dlc/extractor/channel9.py
new file mode 100644
index 000000000..09cacf6d3
--- /dev/null
+++ b/youtube_dlc/extractor/channel9.py
@@ -0,0 +1,262 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ parse_iso8601,
+ qualities,
+ unescapeHTML,
+)
+
+
+class Channel9IE(InfoExtractor):
+ IE_DESC = 'Channel 9'
+ IE_NAME = 'channel9'
+ _VALID_URL = r'https?://(?:www\.)?(?:channel9\.msdn\.com|s\.ch9\.ms)/(?P<contentpath>.+?)(?P<rss>/RSS)?/?(?:[?#&]|$)'
+
+ _TESTS = [{
+ 'url': 'http://channel9.msdn.com/Events/TechEd/Australia/2013/KOS002',
+ 'md5': '32083d4eaf1946db6d454313f44510ca',
+ 'info_dict': {
+ 'id': '6c413323-383a-49dc-88f9-a22800cab024',
+ 'ext': 'wmv',
+ 'title': 'Developer Kick-Off Session: Stuff We Love',
+ 'description': 'md5:b80bf9355a503c193aff7ec6cd5a7731',
+ 'duration': 4576,
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'timestamp': 1377717420,
+ 'upload_date': '20130828',
+ 'session_code': 'KOS002',
+ 'session_room': 'Arena 1A',
+ 'session_speakers': 'count:5',
+ },
+ }, {
+ 'url': 'http://channel9.msdn.com/posts/Self-service-BI-with-Power-BI-nuclear-testing',
+ 'md5': 'dcf983ee6acd2088e7188c3cf79b46bc',
+ 'info_dict': {
+ 'id': 'fe8e435f-bb93-4e01-8e97-a28c01887024',
+ 'ext': 'wmv',
+ 'title': 'Self-service BI with Power BI - nuclear testing',
+ 'description': 'md5:2d17fec927fc91e9e17783b3ecc88f54',
+ 'duration': 1540,
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'timestamp': 1386381991,
+ 'upload_date': '20131207',
+ 'authors': ['Mike Wilmot'],
+ },
+ }, {
+ # low quality mp4 is best
+ 'url': 'https://channel9.msdn.com/Events/CPP/CppCon-2015/Ranges-for-the-Standard-Library',
+ 'info_dict': {
+ 'id': '33ad69d2-6a4e-4172-83a1-a523013dec76',
+ 'ext': 'mp4',
+ 'title': 'Ranges for the Standard Library',
+ 'description': 'md5:9895e0a9fd80822d2f01c454b8f4a372',
+ 'duration': 5646,
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'upload_date': '20150930',
+ 'timestamp': 1443640735,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://channel9.msdn.com/Events/DEVintersection/DEVintersection-2016/RSS',
+ 'info_dict': {
+ 'id': 'Events/DEVintersection/DEVintersection-2016',
+ 'title': 'DEVintersection 2016 Orlando Sessions',
+ },
+ 'playlist_mincount': 14,
+ }, {
+ 'url': 'https://channel9.msdn.com/Niners/Splendid22/Queue/76acff796e8f411184b008028e0d492b/RSS',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://channel9.msdn.com/Events/Speakers/scott-hanselman/RSS?UrlSafeName=scott-hanselman',
+ 'only_matching': True,
+ }]
+
+ _RSS_URL = 'http://channel9.msdn.com/%s/RSS'
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+src=["\'](https?://channel9\.msdn\.com/(?:[^/]+/)+)player\b',
+ webpage)
+
+ def _extract_list(self, video_id, rss_url=None):
+ if not rss_url:
+ rss_url = self._RSS_URL % video_id
+ rss = self._download_xml(rss_url, video_id, 'Downloading RSS')
+ entries = [self.url_result(session_url.text, 'Channel9')
+ for session_url in rss.findall('./channel/item/link')]
+ title_text = rss.find('./channel/title').text
+ return self.playlist_result(entries, video_id, title_text)
+
+ def _real_extract(self, url):
+ content_path, rss = re.match(self._VALID_URL, url).groups()
+
+ if rss:
+ return self._extract_list(content_path, url)
+
+ webpage = self._download_webpage(
+ url, content_path, 'Downloading web page')
+
+ episode_data = self._search_regex(
+ r"data-episode='([^']+)'", webpage, 'episode data', default=None)
+ if episode_data:
+ episode_data = self._parse_json(unescapeHTML(
+ episode_data), content_path)
+ content_id = episode_data['contentId']
+ is_session = '/Sessions(' in episode_data['api']
+ content_url = 'https://channel9.msdn.com/odata' + episode_data['api'] + '?$select=Captions,CommentCount,MediaLengthInSeconds,PublishedDate,Rating,RatingCount,Title,VideoMP4High,VideoMP4Low,VideoMP4Medium,VideoPlayerPreviewImage,VideoWMV,VideoWMVHQ,Views,'
+ if is_session:
+ content_url += 'Code,Description,Room,Slides,Speakers,ZipFile&$expand=Speakers'
+ else:
+ content_url += 'Authors,Body&$expand=Authors'
+ content_data = self._download_json(content_url, content_id)
+ title = content_data['Title']
+
+ QUALITIES = (
+ 'mp3',
+ 'wmv', 'mp4',
+ 'wmv-low', 'mp4-low',
+ 'wmv-mid', 'mp4-mid',
+ 'wmv-high', 'mp4-high',
+ )
+
+ quality_key = qualities(QUALITIES)
+
+ def quality(quality_id, format_url):
+ return (len(QUALITIES) if '_Source.' in format_url
+ else quality_key(quality_id))
+
+ formats = []
+ urls = set()
+
+ SITE_QUALITIES = {
+ 'MP3': 'mp3',
+ 'MP4': 'mp4',
+ 'Low Quality WMV': 'wmv-low',
+ 'Low Quality MP4': 'mp4-low',
+ 'Mid Quality WMV': 'wmv-mid',
+ 'Mid Quality MP4': 'mp4-mid',
+ 'High Quality WMV': 'wmv-high',
+ 'High Quality MP4': 'mp4-high',
+ }
+
+ formats_select = self._search_regex(
+ r'(?s)<select[^>]+name=["\']format[^>]+>(.+?)</select', webpage,
+ 'formats select', default=None)
+ if formats_select:
+ for mobj in re.finditer(
+ r'<option\b[^>]+\bvalue=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>\s*(?P<format>[^<]+?)\s*<',
+ formats_select):
+ format_url = mobj.group('url')
+ if format_url in urls:
+ continue
+ urls.add(format_url)
+ format_id = mobj.group('format')
+ quality_id = SITE_QUALITIES.get(format_id, format_id)
+ formats.append({
+ 'url': format_url,
+ 'format_id': quality_id,
+ 'quality': quality(quality_id, format_url),
+ 'vcodec': 'none' if quality_id == 'mp3' else None,
+ })
+
+ API_QUALITIES = {
+ 'VideoMP4Low': 'mp4-low',
+ 'VideoWMV': 'wmv-mid',
+ 'VideoMP4Medium': 'mp4-mid',
+ 'VideoMP4High': 'mp4-high',
+ 'VideoWMVHQ': 'wmv-hq',
+ }
+
+ for format_id, q in API_QUALITIES.items():
+ q_url = content_data.get(format_id)
+ if not q_url or q_url in urls:
+ continue
+ urls.add(q_url)
+ formats.append({
+ 'url': q_url,
+ 'format_id': q,
+ 'quality': quality(q, q_url),
+ })
+
+ self._sort_formats(formats)
+
+ slides = content_data.get('Slides')
+ zip_file = content_data.get('ZipFile')
+
+ if not formats and not slides and not zip_file:
+ raise ExtractorError(
+ 'None of recording, slides or zip are available for %s' % content_path)
+
+ subtitles = {}
+ for caption in content_data.get('Captions', []):
+ caption_url = caption.get('Url')
+ if not caption_url:
+ continue
+ subtitles.setdefault(caption.get('Language', 'en'), []).append({
+ 'url': caption_url,
+ 'ext': 'vtt',
+ })
+
+ common = {
+ 'id': content_id,
+ 'title': title,
+ 'description': clean_html(content_data.get('Description') or content_data.get('Body')),
+ 'thumbnail': content_data.get('VideoPlayerPreviewImage'),
+ 'duration': int_or_none(content_data.get('MediaLengthInSeconds')),
+ 'timestamp': parse_iso8601(content_data.get('PublishedDate')),
+ 'avg_rating': int_or_none(content_data.get('Rating')),
+ 'rating_count': int_or_none(content_data.get('RatingCount')),
+ 'view_count': int_or_none(content_data.get('Views')),
+ 'comment_count': int_or_none(content_data.get('CommentCount')),
+ 'subtitles': subtitles,
+ }
+ if is_session:
+ speakers = []
+ for s in content_data.get('Speakers', []):
+ speaker_name = s.get('FullName')
+ if not speaker_name:
+ continue
+ speakers.append(speaker_name)
+
+ common.update({
+ 'session_code': content_data.get('Code'),
+ 'session_room': content_data.get('Room'),
+ 'session_speakers': speakers,
+ })
+ else:
+ authors = []
+ for a in content_data.get('Authors', []):
+ author_name = a.get('DisplayName')
+ if not author_name:
+ continue
+ authors.append(author_name)
+ common['authors'] = authors
+
+ contents = []
+
+ if slides:
+ d = common.copy()
+ d.update({'title': title + '-Slides', 'url': slides})
+ contents.append(d)
+
+ if zip_file:
+ d = common.copy()
+ d.update({'title': title + '-Zip', 'url': zip_file})
+ contents.append(d)
+
+ if formats:
+ d = common.copy()
+ d.update({'title': title, 'formats': formats})
+ contents.append(d)
+ return self.playlist_result(contents)
+ else:
+ return self._extract_list(content_path)
diff --git a/youtube_dl/extractor/charlierose.py b/youtube_dlc/extractor/charlierose.py
index 42c9af263..42c9af263 100644
--- a/youtube_dl/extractor/charlierose.py
+++ b/youtube_dlc/extractor/charlierose.py
diff --git a/youtube_dlc/extractor/chaturbate.py b/youtube_dlc/extractor/chaturbate.py
new file mode 100644
index 000000000..a459dcb8d
--- /dev/null
+++ b/youtube_dlc/extractor/chaturbate.py
@@ -0,0 +1,109 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ lowercase_escape,
+ url_or_none,
+)
+
+
+class ChaturbateIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+\.)?chaturbate\.com/(?:fullvideo/?\?.*?\bb=)?(?P<id>[^/?&#]+)'
+ _TESTS = [{
+ 'url': 'https://www.chaturbate.com/siswet19/',
+ 'info_dict': {
+ 'id': 'siswet19',
+ 'ext': 'mp4',
+ 'title': 're:^siswet19 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'age_limit': 18,
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Room is offline',
+ }, {
+ 'url': 'https://chaturbate.com/fullvideo/?b=caylin',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://en.chaturbate.com/siswet19/',
+ 'only_matching': True,
+ }]
+
+ _ROOM_OFFLINE = 'Room is currently offline'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'https://chaturbate.com/%s/' % video_id, video_id,
+ headers=self.geo_verification_headers())
+
+ found_m3u8_urls = []
+
+ data = self._parse_json(
+ self._search_regex(
+ r'initialRoomDossier\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'data', default='{}', group='value'),
+ video_id, transform_source=lowercase_escape, fatal=False)
+ if data:
+ m3u8_url = url_or_none(data.get('hls_source'))
+ if m3u8_url:
+ found_m3u8_urls.append(m3u8_url)
+
+ if not found_m3u8_urls:
+ for m in re.finditer(
+ r'(\\u002[27])(?P<url>http.+?\.m3u8.*?)\1', webpage):
+ found_m3u8_urls.append(lowercase_escape(m.group('url')))
+
+ if not found_m3u8_urls:
+ for m in re.finditer(
+ r'(["\'])(?P<url>http.+?\.m3u8.*?)\1', webpage):
+ found_m3u8_urls.append(m.group('url'))
+
+ m3u8_urls = []
+ for found_m3u8_url in found_m3u8_urls:
+ m3u8_fast_url, m3u8_no_fast_url = found_m3u8_url, found_m3u8_url.replace('_fast', '')
+ for m3u8_url in (m3u8_fast_url, m3u8_no_fast_url):
+ if m3u8_url not in m3u8_urls:
+ m3u8_urls.append(m3u8_url)
+
+ if not m3u8_urls:
+ error = self._search_regex(
+ [r'<span[^>]+class=(["\'])desc_span\1[^>]*>(?P<error>[^<]+)</span>',
+ r'<div[^>]+id=(["\'])defchat\1[^>]*>\s*<p><strong>(?P<error>[^<]+)<'],
+ webpage, 'error', group='error', default=None)
+ if not error:
+ if any(p in webpage for p in (
+ self._ROOM_OFFLINE, 'offline_tipping', 'tip_offline')):
+ error = self._ROOM_OFFLINE
+ if error:
+ raise ExtractorError(error, expected=True)
+ raise ExtractorError('Unable to find stream URL')
+
+ formats = []
+ for m3u8_url in m3u8_urls:
+ for known_id in ('fast', 'slow'):
+ if '_%s' % known_id in m3u8_url:
+ m3u8_id = known_id
+ break
+ else:
+ m3u8_id = None
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, ext='mp4',
+ # ffmpeg skips segments for fast m3u8
+ preference=-10 if m3u8_id == 'fast' else None,
+ m3u8_id=m3u8_id, fatal=False, live=True))
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': self._live_title(video_id),
+ 'thumbnail': 'https://roomimg.stream.highwebmedia.com/ri/%s.jpg' % video_id,
+ 'age_limit': self._rta_search(webpage),
+ 'is_live': True,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/chilloutzone.py b/youtube_dlc/extractor/chilloutzone.py
index 5aac21299..5aac21299 100644
--- a/youtube_dl/extractor/chilloutzone.py
+++ b/youtube_dlc/extractor/chilloutzone.py
diff --git a/youtube_dl/extractor/chirbit.py b/youtube_dlc/extractor/chirbit.py
index 8d75cdf19..8d75cdf19 100644
--- a/youtube_dl/extractor/chirbit.py
+++ b/youtube_dlc/extractor/chirbit.py
diff --git a/youtube_dl/extractor/cinchcast.py b/youtube_dlc/extractor/cinchcast.py
index b861d54b0..b861d54b0 100644
--- a/youtube_dl/extractor/cinchcast.py
+++ b/youtube_dlc/extractor/cinchcast.py
diff --git a/youtube_dl/extractor/cinemax.py b/youtube_dlc/extractor/cinemax.py
index 7f89d33de..7f89d33de 100644
--- a/youtube_dl/extractor/cinemax.py
+++ b/youtube_dlc/extractor/cinemax.py
diff --git a/youtube_dl/extractor/ciscolive.py b/youtube_dlc/extractor/ciscolive.py
index da404e4dc..da404e4dc 100644
--- a/youtube_dl/extractor/ciscolive.py
+++ b/youtube_dlc/extractor/ciscolive.py
diff --git a/youtube_dl/extractor/cjsw.py b/youtube_dlc/extractor/cjsw.py
index 505bdbe16..505bdbe16 100644
--- a/youtube_dl/extractor/cjsw.py
+++ b/youtube_dlc/extractor/cjsw.py
diff --git a/youtube_dl/extractor/cliphunter.py b/youtube_dlc/extractor/cliphunter.py
index f2ca7a337..f2ca7a337 100644
--- a/youtube_dl/extractor/cliphunter.py
+++ b/youtube_dlc/extractor/cliphunter.py
diff --git a/youtube_dl/extractor/clippit.py b/youtube_dlc/extractor/clippit.py
index a1a7a774c..a1a7a774c 100644
--- a/youtube_dl/extractor/clippit.py
+++ b/youtube_dlc/extractor/clippit.py
diff --git a/youtube_dl/extractor/cliprs.py b/youtube_dlc/extractor/cliprs.py
index d55b26d59..d55b26d59 100644
--- a/youtube_dl/extractor/cliprs.py
+++ b/youtube_dlc/extractor/cliprs.py
diff --git a/youtube_dl/extractor/clipsyndicate.py b/youtube_dlc/extractor/clipsyndicate.py
index 6cdb42f5a..6cdb42f5a 100644
--- a/youtube_dl/extractor/clipsyndicate.py
+++ b/youtube_dlc/extractor/clipsyndicate.py
diff --git a/youtube_dl/extractor/closertotruth.py b/youtube_dlc/extractor/closertotruth.py
index 26243d52d..26243d52d 100644
--- a/youtube_dl/extractor/closertotruth.py
+++ b/youtube_dlc/extractor/closertotruth.py
diff --git a/youtube_dlc/extractor/cloudflarestream.py b/youtube_dlc/extractor/cloudflarestream.py
new file mode 100644
index 000000000..2fdcfbb3a
--- /dev/null
+++ b/youtube_dlc/extractor/cloudflarestream.py
@@ -0,0 +1,72 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+import re
+
+from .common import InfoExtractor
+
+
+class CloudflareStreamIE(InfoExtractor):
+ _DOMAIN_RE = r'(?:cloudflarestream\.com|(?:videodelivery|bytehighway)\.net)'
+ _EMBED_RE = r'embed\.%s/embed/[^/]+\.js\?.*?\bvideo=' % _DOMAIN_RE
+ _ID_RE = r'[\da-f]{32}|[\w-]+\.[\w-]+\.[\w-]+'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:watch\.)?%s/|
+ %s
+ )
+ (?P<id>%s)
+ ''' % (_DOMAIN_RE, _EMBED_RE, _ID_RE)
+ _TESTS = [{
+ 'url': 'https://embed.cloudflarestream.com/embed/we4g.fla9.latest.js?video=31c9291ab41fac05471db4e73aa11717',
+ 'info_dict': {
+ 'id': '31c9291ab41fac05471db4e73aa11717',
+ 'ext': 'mp4',
+ 'title': '31c9291ab41fac05471db4e73aa11717',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://watch.cloudflarestream.com/9df17203414fd1db3e3ed74abbe936c1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://cloudflarestream.com/31c9291ab41fac05471db4e73aa11717/manifest/video.mpd',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://embed.videodelivery.net/embed/r4xu.fla9.latest.js?video=81d80727f3022488598f68d323c1ad5e',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [
+ mobj.group('url')
+ for mobj in re.finditer(
+ r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//%s(?:%s).*?)\1' % (CloudflareStreamIE._EMBED_RE, CloudflareStreamIE._ID_RE),
+ webpage)]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ domain = 'bytehighway.net' if 'bytehighway.net/' in url else 'videodelivery.net'
+ base_url = 'https://%s/%s/' % (domain, video_id)
+ if '.' in video_id:
+ video_id = self._parse_json(base64.urlsafe_b64decode(
+ video_id.split('.')[1]), video_id)['sub']
+ manifest_base_url = base_url + 'manifest/video.'
+
+ formats = self._extract_m3u8_formats(
+ manifest_base_url + 'm3u8', video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False)
+ formats.extend(self._extract_mpd_formats(
+ manifest_base_url + 'mpd', video_id, mpd_id='dash', fatal=False))
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': video_id,
+ 'thumbnail': base_url + 'thumbnails/thumbnail.jpg',
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/cloudy.py b/youtube_dlc/extractor/cloudy.py
index 85ca20ecc..85ca20ecc 100644
--- a/youtube_dl/extractor/cloudy.py
+++ b/youtube_dlc/extractor/cloudy.py
diff --git a/youtube_dl/extractor/clubic.py b/youtube_dlc/extractor/clubic.py
index 98f9cb596..98f9cb596 100644
--- a/youtube_dl/extractor/clubic.py
+++ b/youtube_dlc/extractor/clubic.py
diff --git a/youtube_dl/extractor/clyp.py b/youtube_dlc/extractor/clyp.py
index 06d04de13..06d04de13 100644
--- a/youtube_dl/extractor/clyp.py
+++ b/youtube_dlc/extractor/clyp.py
diff --git a/youtube_dl/extractor/cmt.py b/youtube_dlc/extractor/cmt.py
index e701fbeab..e701fbeab 100644
--- a/youtube_dl/extractor/cmt.py
+++ b/youtube_dlc/extractor/cmt.py
diff --git a/youtube_dl/extractor/cnbc.py b/youtube_dlc/extractor/cnbc.py
index 6889b0f40..6889b0f40 100644
--- a/youtube_dl/extractor/cnbc.py
+++ b/youtube_dlc/extractor/cnbc.py
diff --git a/youtube_dl/extractor/cnn.py b/youtube_dlc/extractor/cnn.py
index 774b71055..774b71055 100644
--- a/youtube_dl/extractor/cnn.py
+++ b/youtube_dlc/extractor/cnn.py
diff --git a/youtube_dl/extractor/comedycentral.py b/youtube_dlc/extractor/comedycentral.py
index d08b909a6..d08b909a6 100644
--- a/youtube_dl/extractor/comedycentral.py
+++ b/youtube_dlc/extractor/comedycentral.py
diff --git a/youtube_dlc/extractor/common.py b/youtube_dlc/extractor/common.py
new file mode 100644
index 000000000..c1ea5d846
--- /dev/null
+++ b/youtube_dlc/extractor/common.py
@@ -0,0 +1,3013 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+import datetime
+import hashlib
+import json
+import netrc
+import os
+import random
+import re
+import socket
+import sys
+import time
+import math
+
+from ..compat import (
+ compat_cookiejar_Cookie,
+ compat_cookies,
+ compat_etree_Element,
+ compat_etree_fromstring,
+ compat_getpass,
+ compat_integer_types,
+ compat_http_client,
+ compat_os_name,
+ compat_str,
+ compat_urllib_error,
+ compat_urllib_parse_unquote,
+ compat_urllib_parse_urlencode,
+ compat_urllib_request,
+ compat_urlparse,
+ compat_xml_parse_error,
+)
+from ..downloader.f4m import (
+ get_base_url,
+ remove_encrypted_media,
+)
+from ..utils import (
+ NO_DEFAULT,
+ age_restricted,
+ base_url,
+ bug_reports_message,
+ clean_html,
+ compiled_regex_type,
+ determine_ext,
+ determine_protocol,
+ dict_get,
+ error_to_compat_str,
+ ExtractorError,
+ extract_attributes,
+ fix_xml_ampersands,
+ float_or_none,
+ GeoRestrictedError,
+ GeoUtils,
+ int_or_none,
+ js_to_json,
+ JSON_LD_RE,
+ mimetype2ext,
+ orderedSet,
+ parse_bitrate,
+ parse_codecs,
+ parse_duration,
+ parse_iso8601,
+ parse_m3u8_attributes,
+ parse_resolution,
+ RegexNotFoundError,
+ sanitized_Request,
+ sanitize_filename,
+ str_or_none,
+ strip_or_none,
+ unescapeHTML,
+ unified_strdate,
+ unified_timestamp,
+ update_Request,
+ update_url_query,
+ urljoin,
+ url_basename,
+ url_or_none,
+ xpath_element,
+ xpath_text,
+ xpath_with_ns,
+)
+
+
+class InfoExtractor(object):
+ """Information Extractor class.
+
+ Information extractors are the classes that, given a URL, extract
+ information about the video (or videos) the URL refers to. This
+ information includes the real video URL, the video title, author and
+ others. The information is stored in a dictionary which is then
+ passed to the YoutubeDL. The YoutubeDL processes this
+ information possibly downloading the video to the file system, among
+ other possible outcomes.
+
+ The type field determines the type of the result.
+ By far the most common value (and the default if _type is missing) is
+ "video", which indicates a single video.
+
+ For a video, the dictionaries must include the following fields:
+
+ id: Video identifier.
+ title: Video title, unescaped.
+
+ Additionally, it must contain either a formats entry or a url one:
+
+ formats: A list of dictionaries for each format available, ordered
+ from worst to best quality.
+
+ Potential fields:
+ * url The mandatory URL representing the media:
+ for plain file media - HTTP URL of this file,
+ for RTMP - RTMP URL,
+ for HLS - URL of the M3U8 media playlist,
+ for HDS - URL of the F4M manifest,
+ for DASH
+ - HTTP URL to plain file media (in case of
+ unfragmented media)
+ - URL of the MPD manifest or base URL
+ representing the media if MPD manifest
+ is parsed from a string (in case of
+ fragmented media)
+ for MSS - URL of the ISM manifest.
+ * manifest_url
+ The URL of the manifest file in case of
+ fragmented media:
+ for HLS - URL of the M3U8 master playlist,
+ for HDS - URL of the F4M manifest,
+ for DASH - URL of the MPD manifest,
+ for MSS - URL of the ISM manifest.
+ * ext Will be calculated from URL if missing
+ * format A human-readable description of the format
+ ("mp4 container with h264/opus").
+ Calculated from the format_id, width, height.
+ and format_note fields if missing.
+ * format_id A short description of the format
+ ("mp4_h264_opus" or "19").
+ Technically optional, but strongly recommended.
+ * format_note Additional info about the format
+ ("3D" or "DASH video")
+ * width Width of the video, if known
+ * height Height of the video, if known
+ * resolution Textual description of width and height
+ * tbr Average bitrate of audio and video in KBit/s
+ * abr Average audio bitrate in KBit/s
+ * acodec Name of the audio codec in use
+ * asr Audio sampling rate in Hertz
+ * vbr Average video bitrate in KBit/s
+ * fps Frame rate
+ * vcodec Name of the video codec in use
+ * container Name of the container format
+ * filesize The number of bytes, if known in advance
+ * filesize_approx An estimate for the number of bytes
+ * player_url SWF Player URL (used for rtmpdump).
+ * protocol The protocol that will be used for the actual
+ download, lower-case.
+ "http", "https", "rtsp", "rtmp", "rtmpe",
+ "m3u8", "m3u8_native" or "http_dash_segments".
+ * fragment_base_url
+ Base URL for fragments. Each fragment's path
+ value (if present) will be relative to
+ this URL.
+ * fragments A list of fragments of a fragmented media.
+ Each fragment entry must contain either an url
+ or a path. If an url is present it should be
+ considered by a client. Otherwise both path and
+ fragment_base_url must be present. Here is
+ the list of all potential fields:
+ * "url" - fragment's URL
+ * "path" - fragment's path relative to
+ fragment_base_url
+ * "duration" (optional, int or float)
+ * "filesize" (optional, int)
+ * preference Order number of this format. If this field is
+ present and not None, the formats get sorted
+ by this field, regardless of all other values.
+ -1 for default (order by other properties),
+ -2 or smaller for less than default.
+ < -1000 to hide the format (if there is
+ another one which is strictly better)
+ * language Language code, e.g. "de" or "en-US".
+ * language_preference Is this in the language mentioned in
+ the URL?
+ 10 if it's what the URL is about,
+ -1 for default (don't know),
+ -10 otherwise, other values reserved for now.
+ * quality Order number of the video quality of this
+ format, irrespective of the file format.
+ -1 for default (order by other properties),
+ -2 or smaller for less than default.
+ * source_preference Order number for this video source
+ (quality takes higher priority)
+ -1 for default (order by other properties),
+ -2 or smaller for less than default.
+ * http_headers A dictionary of additional HTTP headers
+ to add to the request.
+ * stretched_ratio If given and not 1, indicates that the
+ video's pixels are not square.
+ width : height ratio as float.
+ * no_resume The server does not support resuming the
+ (HTTP or RTMP) download. Boolean.
+ * downloader_options A dictionary of downloader options as
+ described in FileDownloader
+
+ url: Final video URL.
+ ext: Video filename extension.
+ format: The video format, defaults to ext (used for --get-format)
+ player_url: SWF Player URL (used for rtmpdump).
+
+ The following fields are optional:
+
+ alt_title: A secondary title of the video.
+ display_id An alternative identifier for the video, not necessarily
+ unique, but available before title. Typically, id is
+ something like "4234987", title "Dancing naked mole rats",
+ and display_id "dancing-naked-mole-rats"
+ thumbnails: A list of dictionaries, with the following entries:
+ * "id" (optional, string) - Thumbnail format ID
+ * "url"
+ * "preference" (optional, int) - quality of the image
+ * "width" (optional, int)
+ * "height" (optional, int)
+ * "resolution" (optional, string "{width}x{height}",
+ deprecated)
+ * "filesize" (optional, int)
+ thumbnail: Full URL to a video thumbnail image.
+ description: Full video description.
+ uploader: Full name of the video uploader.
+ license: License name the video is licensed under.
+ creator: The creator of the video.
+ release_date: The date (YYYYMMDD) when the video was released.
+ timestamp: UNIX timestamp of the moment the video became available.
+ upload_date: Video upload date (YYYYMMDD).
+ If not explicitly set, calculated from timestamp.
+ uploader_id: Nickname or id of the video uploader.
+ uploader_url: Full URL to a personal webpage of the video uploader.
+ channel: Full name of the channel the video is uploaded on.
+ Note that channel fields may or may not repeat uploader
+ fields. This depends on a particular extractor.
+ channel_id: Id of the channel.
+ channel_url: Full URL to a channel webpage.
+ location: Physical location where the video was filmed.
+ subtitles: The available subtitles as a dictionary in the format
+ {tag: subformats}. "tag" is usually a language code, and
+ "subformats" is a list sorted from lower to higher
+ preference, each element is a dictionary with the "ext"
+ entry and one of:
+ * "data": The subtitles file contents
+ * "url": A URL pointing to the subtitles file
+ "ext" will be calculated from URL if missing
+ automatic_captions: Like 'subtitles', used by the YoutubeIE for
+ automatically generated captions
+ duration: Length of the video in seconds, as an integer or float.
+ view_count: How many users have watched the video on the platform.
+ like_count: Number of positive ratings of the video
+ dislike_count: Number of negative ratings of the video
+ repost_count: Number of reposts of the video
+ average_rating: Average rating give by users, the scale used depends on the webpage
+ comment_count: Number of comments on the video
+ comments: A list of comments, each with one or more of the following
+ properties (all but one of text or html optional):
+ * "author" - human-readable name of the comment author
+ * "author_id" - user ID of the comment author
+ * "id" - Comment ID
+ * "html" - Comment as HTML
+ * "text" - Plain text of the comment
+ * "timestamp" - UNIX timestamp of comment
+ * "parent" - ID of the comment this one is replying to.
+ Set to "root" to indicate that this is a
+ comment to the original video.
+ age_limit: Age restriction for the video, as an integer (years)
+ webpage_url: The URL to the video webpage, if given to youtube-dlc it
+ should allow to get the same result again. (It will be set
+ by YoutubeDL if it's missing)
+ categories: A list of categories that the video falls in, for example
+ ["Sports", "Berlin"]
+ tags: A list of tags assigned to the video, e.g. ["sweden", "pop music"]
+ is_live: True, False, or None (=unknown). Whether this video is a
+ live stream that goes on instead of a fixed-length video.
+ start_time: Time in seconds where the reproduction should start, as
+ specified in the URL.
+ end_time: Time in seconds where the reproduction should end, as
+ specified in the URL.
+ chapters: A list of dictionaries, with the following entries:
+ * "start_time" - The start time of the chapter in seconds
+ * "end_time" - The end time of the chapter in seconds
+ * "title" (optional, string)
+
+ The following fields should only be used when the video belongs to some logical
+ chapter or section:
+
+ chapter: Name or title of the chapter the video belongs to.
+ chapter_number: Number of the chapter the video belongs to, as an integer.
+ chapter_id: Id of the chapter the video belongs to, as a unicode string.
+
+ The following fields should only be used when the video is an episode of some
+ series, programme or podcast:
+
+ series: Title of the series or programme the video episode belongs to.
+ season: Title of the season the video episode belongs to.
+ season_number: Number of the season the video episode belongs to, as an integer.
+ season_id: Id of the season the video episode belongs to, as a unicode string.
+ episode: Title of the video episode. Unlike mandatory video title field,
+ this field should denote the exact title of the video episode
+ without any kind of decoration.
+ episode_number: Number of the video episode within a season, as an integer.
+ episode_id: Id of the video episode, as a unicode string.
+
+ The following fields should only be used when the media is a track or a part of
+ a music album:
+
+ track: Title of the track.
+ track_number: Number of the track within an album or a disc, as an integer.
+ track_id: Id of the track (useful in case of custom indexing, e.g. 6.iii),
+ as a unicode string.
+ artist: Artist(s) of the track.
+ genre: Genre(s) of the track.
+ album: Title of the album the track belongs to.
+ album_type: Type of the album (e.g. "Demo", "Full-length", "Split", "Compilation", etc).
+ album_artist: List of all artists appeared on the album (e.g.
+ "Ash Borer / Fell Voices" or "Various Artists", useful for splits
+ and compilations).
+ disc_number: Number of the disc or other physical medium the track belongs to,
+ as an integer.
+ release_year: Year (YYYY) when the album was released.
+
+ Unless mentioned otherwise, the fields should be Unicode strings.
+
+ Unless mentioned otherwise, None is equivalent to absence of information.
+
+
+ _type "playlist" indicates multiple videos.
+ There must be a key "entries", which is a list, an iterable, or a PagedList
+ object, each element of which is a valid dictionary by this specification.
+
+ Additionally, playlists can have "id", "title", "description", "uploader",
+ "uploader_id", "uploader_url" attributes with the same semantics as videos
+ (see above).
+
+
+ _type "multi_video" indicates that there are multiple videos that
+ form a single show, for examples multiple acts of an opera or TV episode.
+ It must have an entries key like a playlist and contain all the keys
+ required for a video at the same time.
+
+
+ _type "url" indicates that the video must be extracted from another
+ location, possibly by a different extractor. Its only required key is:
+ "url" - the next URL to extract.
+ The key "ie_key" can be set to the class name (minus the trailing "IE",
+ e.g. "Youtube") if the extractor class is known in advance.
+ Additionally, the dictionary may have any properties of the resolved entity
+ known in advance, for example "title" if the title of the referred video is
+ known ahead of time.
+
+
+ _type "url_transparent" entities have the same specification as "url", but
+ indicate that the given additional information is more precise than the one
+ associated with the resolved URL.
+ This is useful when a site employs a video service that hosts the video and
+ its technical metadata, but that video service does not embed a useful
+ title, description etc.
+
+
+ Subclasses of this one should re-define the _real_initialize() and
+ _real_extract() methods and define a _VALID_URL regexp.
+ Probably, they should also be added to the list of extractors.
+
+ _GEO_BYPASS attribute may be set to False in order to disable
+ geo restriction bypass mechanisms for a particular extractor.
+ Though it won't disable explicit geo restriction bypass based on
+ country code provided with geo_bypass_country.
+
+ _GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted
+ countries for this extractor. One of these countries will be used by
+ geo restriction bypass mechanism right away in order to bypass
+ geo restriction, of course, if the mechanism is not disabled.
+
+ _GEO_IP_BLOCKS attribute may contain a list of presumably geo unrestricted
+ IP blocks in CIDR notation for this extractor. One of these IP blocks
+ will be used by geo restriction bypass mechanism similarly
+ to _GEO_COUNTRIES.
+
+ Finally, the _WORKING attribute should be set to False for broken IEs
+ in order to warn the users and skip the tests.
+ """
+
+ _ready = False
+ _downloader = None
+ _x_forwarded_for_ip = None
+ _GEO_BYPASS = True
+ _GEO_COUNTRIES = None
+ _GEO_IP_BLOCKS = None
+ _WORKING = True
+
+ def __init__(self, downloader=None):
+ """Constructor. Receives an optional downloader."""
+ self._ready = False
+ self._x_forwarded_for_ip = None
+ self.set_downloader(downloader)
+
+ @classmethod
+ def suitable(cls, url):
+ """Receives a URL and returns True if suitable for this IE."""
+
+ # This does not use has/getattr intentionally - we want to know whether
+ # we have cached the regexp for *this* class, whereas getattr would also
+ # match the superclass
+ if '_VALID_URL_RE' not in cls.__dict__:
+ cls._VALID_URL_RE = re.compile(cls._VALID_URL)
+ return cls._VALID_URL_RE.match(url) is not None
+
+ @classmethod
+ def _match_id(cls, url):
+ if '_VALID_URL_RE' not in cls.__dict__:
+ cls._VALID_URL_RE = re.compile(cls._VALID_URL)
+ m = cls._VALID_URL_RE.match(url)
+ assert m
+ return compat_str(m.group('id'))
+
+ @classmethod
+ def working(cls):
+ """Getter method for _WORKING."""
+ return cls._WORKING
+
+ def initialize(self):
+ """Initializes an instance (authentication, etc)."""
+ self._initialize_geo_bypass({
+ 'countries': self._GEO_COUNTRIES,
+ 'ip_blocks': self._GEO_IP_BLOCKS,
+ })
+ if not self._ready:
+ self._real_initialize()
+ self._ready = True
+
+ def _initialize_geo_bypass(self, geo_bypass_context):
+ """
+ Initialize geo restriction bypass mechanism.
+
+ This method is used to initialize geo bypass mechanism based on faking
+ X-Forwarded-For HTTP header. A random country from provided country list
+ is selected and a random IP belonging to this country is generated. This
+ IP will be passed as X-Forwarded-For HTTP header in all subsequent
+ HTTP requests.
+
+ This method will be used for initial geo bypass mechanism initialization
+ during the instance initialization with _GEO_COUNTRIES and
+ _GEO_IP_BLOCKS.
+
+ You may also manually call it from extractor's code if geo bypass
+ information is not available beforehand (e.g. obtained during
+ extraction) or due to some other reason. In this case you should pass
+ this information in geo bypass context passed as first argument. It may
+ contain following fields:
+
+ countries: List of geo unrestricted countries (similar
+ to _GEO_COUNTRIES)
+ ip_blocks: List of geo unrestricted IP blocks in CIDR notation
+ (similar to _GEO_IP_BLOCKS)
+
+ """
+ if not self._x_forwarded_for_ip:
+
+ # Geo bypass mechanism is explicitly disabled by user
+ if not self._downloader.params.get('geo_bypass', True):
+ return
+
+ if not geo_bypass_context:
+ geo_bypass_context = {}
+
+ # Backward compatibility: previously _initialize_geo_bypass
+ # expected a list of countries, some 3rd party code may still use
+ # it this way
+ if isinstance(geo_bypass_context, (list, tuple)):
+ geo_bypass_context = {
+ 'countries': geo_bypass_context,
+ }
+
+ # The whole point of geo bypass mechanism is to fake IP
+ # as X-Forwarded-For HTTP header based on some IP block or
+ # country code.
+
+ # Path 1: bypassing based on IP block in CIDR notation
+
+ # Explicit IP block specified by user, use it right away
+ # regardless of whether extractor is geo bypassable or not
+ ip_block = self._downloader.params.get('geo_bypass_ip_block', None)
+
+ # Otherwise use random IP block from geo bypass context but only
+ # if extractor is known as geo bypassable
+ if not ip_block:
+ ip_blocks = geo_bypass_context.get('ip_blocks')
+ if self._GEO_BYPASS and ip_blocks:
+ ip_block = random.choice(ip_blocks)
+
+ if ip_block:
+ self._x_forwarded_for_ip = GeoUtils.random_ipv4(ip_block)
+ if self._downloader.params.get('verbose', False):
+ self._downloader.to_screen(
+ '[debug] Using fake IP %s as X-Forwarded-For.'
+ % self._x_forwarded_for_ip)
+ return
+
+ # Path 2: bypassing based on country code
+
+ # Explicit country code specified by user, use it right away
+ # regardless of whether extractor is geo bypassable or not
+ country = self._downloader.params.get('geo_bypass_country', None)
+
+ # Otherwise use random country code from geo bypass context but
+ # only if extractor is known as geo bypassable
+ if not country:
+ countries = geo_bypass_context.get('countries')
+ if self._GEO_BYPASS and countries:
+ country = random.choice(countries)
+
+ if country:
+ self._x_forwarded_for_ip = GeoUtils.random_ipv4(country)
+ if self._downloader.params.get('verbose', False):
+ self._downloader.to_screen(
+ '[debug] Using fake IP %s (%s) as X-Forwarded-For.'
+ % (self._x_forwarded_for_ip, country.upper()))
+
+ def extract(self, url):
+ """Extracts URL information and returns it in list of dicts."""
+ try:
+ for _ in range(2):
+ try:
+ self.initialize()
+ ie_result = self._real_extract(url)
+ if self._x_forwarded_for_ip:
+ ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip
+ return ie_result
+ except GeoRestrictedError as e:
+ if self.__maybe_fake_ip_and_retry(e.countries):
+ continue
+ raise
+ except ExtractorError:
+ raise
+ except compat_http_client.IncompleteRead as e:
+ raise ExtractorError('A network error has occurred.', cause=e, expected=True)
+ except (KeyError, StopIteration) as e:
+ raise ExtractorError('An extractor error has occurred.', cause=e)
+
+ def __maybe_fake_ip_and_retry(self, countries):
+ if (not self._downloader.params.get('geo_bypass_country', None)
+ and self._GEO_BYPASS
+ and self._downloader.params.get('geo_bypass', True)
+ and not self._x_forwarded_for_ip
+ and countries):
+ country_code = random.choice(countries)
+ self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
+ if self._x_forwarded_for_ip:
+ self.report_warning(
+ 'Video is geo restricted. Retrying extraction with fake IP %s (%s) as X-Forwarded-For.'
+ % (self._x_forwarded_for_ip, country_code.upper()))
+ return True
+ return False
+
+ def set_downloader(self, downloader):
+ """Sets the downloader for this IE."""
+ self._downloader = downloader
+
+ def _real_initialize(self):
+ """Real initialization process. Redefine in subclasses."""
+ pass
+
+ def _real_extract(self, url):
+ """Real extraction process. Redefine in subclasses."""
+ pass
+
+ @classmethod
+ def ie_key(cls):
+ """A string for getting the InfoExtractor with get_info_extractor"""
+ return compat_str(cls.__name__[:-2])
+
+ @property
+ def IE_NAME(self):
+ return compat_str(type(self).__name__[:-2])
+
+ @staticmethod
+ def __can_accept_status_code(err, expected_status):
+ assert isinstance(err, compat_urllib_error.HTTPError)
+ if expected_status is None:
+ return False
+ if isinstance(expected_status, compat_integer_types):
+ return err.code == expected_status
+ elif isinstance(expected_status, (list, tuple)):
+ return err.code in expected_status
+ elif callable(expected_status):
+ return expected_status(err.code) is True
+ else:
+ assert False
+
+ def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, data=None, headers={}, query={}, expected_status=None):
+ """
+ Return the response handle.
+
+ See _download_webpage docstring for arguments specification.
+ """
+ if note is None:
+ self.report_download_webpage(video_id)
+ elif note is not False:
+ if video_id is None:
+ self.to_screen('%s' % (note,))
+ else:
+ self.to_screen('%s: %s' % (video_id, note))
+
+ # Some sites check X-Forwarded-For HTTP header in order to figure out
+ # the origin of the client behind proxy. This allows bypassing geo
+ # restriction by faking this header's value to IP that belongs to some
+ # geo unrestricted country. We will do so once we encounter any
+ # geo restriction error.
+ if self._x_forwarded_for_ip:
+ if 'X-Forwarded-For' not in headers:
+ headers['X-Forwarded-For'] = self._x_forwarded_for_ip
+
+ if isinstance(url_or_request, compat_urllib_request.Request):
+ url_or_request = update_Request(
+ url_or_request, data=data, headers=headers, query=query)
+ else:
+ if query:
+ url_or_request = update_url_query(url_or_request, query)
+ if data is not None or headers:
+ url_or_request = sanitized_Request(url_or_request, data, headers)
+ try:
+ return self._downloader.urlopen(url_or_request)
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ if isinstance(err, compat_urllib_error.HTTPError):
+ if self.__can_accept_status_code(err, expected_status):
+ # Retain reference to error to prevent file object from
+ # being closed before it can be read. Works around the
+ # effects of <https://bugs.python.org/issue15002>
+ # introduced in Python 3.4.1.
+ err.fp._error = err
+ return err.fp
+
+ if errnote is False:
+ return False
+ if errnote is None:
+ errnote = 'Unable to download webpage'
+
+ errmsg = '%s: %s' % (errnote, error_to_compat_str(err))
+ if fatal:
+ raise ExtractorError(errmsg, sys.exc_info()[2], cause=err)
+ else:
+ self._downloader.report_warning(errmsg)
+ return False
+
+ def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None):
+ """
+ Return a tuple (page content as string, URL handle).
+
+ See _download_webpage docstring for arguments specification.
+ """
+ # Strip hashes from the URL (#1038)
+ if isinstance(url_or_request, (compat_str, str)):
+ url_or_request = url_or_request.partition('#')[0]
+
+ urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query, expected_status=expected_status)
+ if urlh is False:
+ assert not fatal
+ return False
+ content = self._webpage_read_content(urlh, url_or_request, video_id, note, errnote, fatal, encoding=encoding)
+ return (content, urlh)
+
+ @staticmethod
+ def _guess_encoding_from_content(content_type, webpage_bytes):
+ m = re.match(r'[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+\s*;\s*charset=(.+)', content_type)
+ if m:
+ encoding = m.group(1)
+ else:
+ m = re.search(br'<meta[^>]+charset=[\'"]?([^\'")]+)[ /\'">]',
+ webpage_bytes[:1024])
+ if m:
+ encoding = m.group(1).decode('ascii')
+ elif webpage_bytes.startswith(b'\xff\xfe'):
+ encoding = 'utf-16'
+ else:
+ encoding = 'utf-8'
+
+ return encoding
+
+ def __check_blocked(self, content):
+ first_block = content[:512]
+ if ('<title>Access to this site is blocked</title>' in content
+ and 'Websense' in first_block):
+ msg = 'Access to this webpage has been blocked by Websense filtering software in your network.'
+ blocked_iframe = self._html_search_regex(
+ r'<iframe src="([^"]+)"', content,
+ 'Websense information URL', default=None)
+ if blocked_iframe:
+ msg += ' Visit %s for more details' % blocked_iframe
+ raise ExtractorError(msg, expected=True)
+ if '<title>The URL you requested has been blocked</title>' in first_block:
+ msg = (
+ 'Access to this webpage has been blocked by Indian censorship. '
+ 'Use a VPN or proxy server (with --proxy) to route around it.')
+ block_msg = self._html_search_regex(
+ r'</h1><p>(.*?)</p>',
+ content, 'block message', default=None)
+ if block_msg:
+ msg += ' (Message: "%s")' % block_msg.replace('\n', ' ')
+ raise ExtractorError(msg, expected=True)
+ if ('<title>TTK :: Доступ к ресурсу ограничен</title>' in content
+ and 'blocklist.rkn.gov.ru' in content):
+ raise ExtractorError(
+ 'Access to this webpage has been blocked by decision of the Russian government. '
+ 'Visit http://blocklist.rkn.gov.ru/ for a block reason.',
+ expected=True)
+
+ def _webpage_read_content(self, urlh, url_or_request, video_id, note=None, errnote=None, fatal=True, prefix=None, encoding=None):
+ content_type = urlh.headers.get('Content-Type', '')
+ webpage_bytes = urlh.read()
+ if prefix is not None:
+ webpage_bytes = prefix + webpage_bytes
+ if not encoding:
+ encoding = self._guess_encoding_from_content(content_type, webpage_bytes)
+ if self._downloader.params.get('dump_intermediate_pages', False):
+ self.to_screen('Dumping request to ' + urlh.geturl())
+ dump = base64.b64encode(webpage_bytes).decode('ascii')
+ self._downloader.to_screen(dump)
+ if self._downloader.params.get('write_pages', False):
+ basen = '%s_%s' % (video_id, urlh.geturl())
+ if len(basen) > 240:
+ h = '___' + hashlib.md5(basen.encode('utf-8')).hexdigest()
+ basen = basen[:240 - len(h)] + h
+ raw_filename = basen + '.dump'
+ filename = sanitize_filename(raw_filename, restricted=True)
+ self.to_screen('Saving request to ' + filename)
+ # Working around MAX_PATH limitation on Windows (see
+ # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx)
+ if compat_os_name == 'nt':
+ absfilepath = os.path.abspath(filename)
+ if len(absfilepath) > 259:
+ filename = '\\\\?\\' + absfilepath
+ with open(filename, 'wb') as outf:
+ outf.write(webpage_bytes)
+
+ try:
+ content = webpage_bytes.decode(encoding, 'replace')
+ except LookupError:
+ content = webpage_bytes.decode('utf-8', 'replace')
+
+ self.__check_blocked(content)
+
+ return content
+
+ def _download_webpage(
+ self, url_or_request, video_id, note=None, errnote=None,
+ fatal=True, tries=1, timeout=5, encoding=None, data=None,
+ headers={}, query={}, expected_status=None):
+ """
+ Return the data of the page as a string.
+
+ Arguments:
+ url_or_request -- plain text URL as a string or
+ a compat_urllib_request.Requestobject
+ video_id -- Video/playlist/item identifier (string)
+
+ Keyword arguments:
+ note -- note printed before downloading (string)
+ errnote -- note printed in case of an error (string)
+ fatal -- flag denoting whether error should be considered fatal,
+ i.e. whether it should cause ExtractionError to be raised,
+ otherwise a warning will be reported and extraction continued
+ tries -- number of tries
+ timeout -- sleep interval between tries
+ encoding -- encoding for a page content decoding, guessed automatically
+ when not explicitly specified
+ data -- POST data (bytes)
+ headers -- HTTP headers (dict)
+ query -- URL query (dict)
+ expected_status -- allows to accept failed HTTP requests (non 2xx
+ status code) by explicitly specifying a set of accepted status
+ codes. Can be any of the following entities:
+ - an integer type specifying an exact failed status code to
+ accept
+ - a list or a tuple of integer types specifying a list of
+ failed status codes to accept
+ - a callable accepting an actual failed status code and
+ returning True if it should be accepted
+ Note that this argument does not affect success status codes (2xx)
+ which are always accepted.
+ """
+
+ success = False
+ try_count = 0
+ while success is False:
+ try:
+ res = self._download_webpage_handle(
+ url_or_request, video_id, note, errnote, fatal,
+ encoding=encoding, data=data, headers=headers, query=query,
+ expected_status=expected_status)
+ success = True
+ except compat_http_client.IncompleteRead as e:
+ try_count += 1
+ if try_count >= tries:
+ raise e
+ self._sleep(timeout, video_id)
+ if res is False:
+ return res
+ else:
+ content, _ = res
+ return content
+
+ def _download_xml_handle(
+ self, url_or_request, video_id, note='Downloading XML',
+ errnote='Unable to download XML', transform_source=None,
+ fatal=True, encoding=None, data=None, headers={}, query={},
+ expected_status=None):
+ """
+ Return a tuple (xml as an compat_etree_Element, URL handle).
+
+ See _download_webpage docstring for arguments specification.
+ """
+ res = self._download_webpage_handle(
+ url_or_request, video_id, note, errnote, fatal=fatal,
+ encoding=encoding, data=data, headers=headers, query=query,
+ expected_status=expected_status)
+ if res is False:
+ return res
+ xml_string, urlh = res
+ return self._parse_xml(
+ xml_string, video_id, transform_source=transform_source,
+ fatal=fatal), urlh
+
+ def _download_xml(
+ self, url_or_request, video_id,
+ note='Downloading XML', errnote='Unable to download XML',
+ transform_source=None, fatal=True, encoding=None,
+ data=None, headers={}, query={}, expected_status=None):
+ """
+ Return the xml as an compat_etree_Element.
+
+ See _download_webpage docstring for arguments specification.
+ """
+ res = self._download_xml_handle(
+ url_or_request, video_id, note=note, errnote=errnote,
+ transform_source=transform_source, fatal=fatal, encoding=encoding,
+ data=data, headers=headers, query=query,
+ expected_status=expected_status)
+ return res if res is False else res[0]
+
+ def _parse_xml(self, xml_string, video_id, transform_source=None, fatal=True):
+ if transform_source:
+ xml_string = transform_source(xml_string)
+ try:
+ return compat_etree_fromstring(xml_string.encode('utf-8'))
+ except compat_xml_parse_error as ve:
+ errmsg = '%s: Failed to parse XML ' % video_id
+ if fatal:
+ raise ExtractorError(errmsg, cause=ve)
+ else:
+ self.report_warning(errmsg + str(ve))
+
+ def _download_json_handle(
+ self, url_or_request, video_id, note='Downloading JSON metadata',
+ errnote='Unable to download JSON metadata', transform_source=None,
+ fatal=True, encoding=None, data=None, headers={}, query={},
+ expected_status=None):
+ """
+ Return a tuple (JSON object, URL handle).
+
+ See _download_webpage docstring for arguments specification.
+ """
+ res = self._download_webpage_handle(
+ url_or_request, video_id, note, errnote, fatal=fatal,
+ encoding=encoding, data=data, headers=headers, query=query,
+ expected_status=expected_status)
+ if res is False:
+ return res
+ json_string, urlh = res
+ return self._parse_json(
+ json_string, video_id, transform_source=transform_source,
+ fatal=fatal), urlh
+
+ def _download_json(
+ self, url_or_request, video_id, note='Downloading JSON metadata',
+ errnote='Unable to download JSON metadata', transform_source=None,
+ fatal=True, encoding=None, data=None, headers={}, query={},
+ expected_status=None):
+ """
+ Return the JSON object as a dict.
+
+ See _download_webpage docstring for arguments specification.
+ """
+ res = self._download_json_handle(
+ url_or_request, video_id, note=note, errnote=errnote,
+ transform_source=transform_source, fatal=fatal, encoding=encoding,
+ data=data, headers=headers, query=query,
+ expected_status=expected_status)
+ return res if res is False else res[0]
+
+ def _parse_json(self, json_string, video_id, transform_source=None, fatal=True):
+ if transform_source:
+ json_string = transform_source(json_string)
+ try:
+ return json.loads(json_string)
+ except ValueError as ve:
+ errmsg = '%s: Failed to parse JSON ' % video_id
+ if fatal:
+ raise ExtractorError(errmsg, cause=ve)
+ else:
+ self.report_warning(errmsg + str(ve))
+
+ def report_warning(self, msg, video_id=None):
+ idstr = '' if video_id is None else '%s: ' % video_id
+ self._downloader.report_warning(
+ '[%s] %s%s' % (self.IE_NAME, idstr, msg))
+
+ def to_screen(self, msg):
+ """Print msg to screen, prefixing it with '[ie_name]'"""
+ self._downloader.to_screen('[%s] %s' % (self.IE_NAME, msg))
+
+ def report_extraction(self, id_or_name):
+ """Report information extraction."""
+ self.to_screen('%s: Extracting information' % id_or_name)
+
+ def report_download_webpage(self, video_id):
+ """Report webpage download."""
+ self.to_screen('%s: Downloading webpage' % video_id)
+
+ def report_age_confirmation(self):
+ """Report attempt to confirm age."""
+ self.to_screen('Confirming age')
+
+ def report_login(self):
+ """Report attempt to log in."""
+ self.to_screen('Logging in')
+
+ @staticmethod
+ def raise_login_required(msg='This video is only available for registered users'):
+ raise ExtractorError(
+ '%s. Use --username and --password or --netrc to provide account credentials.' % msg,
+ expected=True)
+
+ @staticmethod
+ def raise_geo_restricted(msg='This video is not available from your location due to geo restriction', countries=None):
+ raise GeoRestrictedError(msg, countries=countries)
+
+ # Methods for following #608
+ @staticmethod
+ def url_result(url, ie=None, video_id=None, video_title=None):
+ """Returns a URL that points to a page that should be processed"""
+ # TODO: ie should be the class used for getting the info
+ video_info = {'_type': 'url',
+ 'url': url,
+ 'ie_key': ie}
+ if video_id is not None:
+ video_info['id'] = video_id
+ if video_title is not None:
+ video_info['title'] = video_title
+ return video_info
+
+ def playlist_from_matches(self, matches, playlist_id=None, playlist_title=None, getter=None, ie=None):
+ urls = orderedSet(
+ self.url_result(self._proto_relative_url(getter(m) if getter else m), ie)
+ for m in matches)
+ return self.playlist_result(
+ urls, playlist_id=playlist_id, playlist_title=playlist_title)
+
+ @staticmethod
+ def playlist_result(entries, playlist_id=None, playlist_title=None, playlist_description=None):
+ """Returns a playlist"""
+ video_info = {'_type': 'playlist',
+ 'entries': entries}
+ if playlist_id:
+ video_info['id'] = playlist_id
+ if playlist_title:
+ video_info['title'] = playlist_title
+ if playlist_description:
+ video_info['description'] = playlist_description
+ return video_info
+
+ def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
+ """
+ Perform a regex search on the given string, using a single or a list of
+ patterns returning the first matching group.
+ In case of failure return a default value or raise a WARNING or a
+ RegexNotFoundError, depending on fatal, specifying the field name.
+ """
+ if isinstance(pattern, (str, compat_str, compiled_regex_type)):
+ mobj = re.search(pattern, string, flags)
+ else:
+ for p in pattern:
+ mobj = re.search(p, string, flags)
+ if mobj:
+ break
+
+ if not self._downloader.params.get('no_color') and compat_os_name != 'nt' and sys.stderr.isatty():
+ _name = '\033[0;34m%s\033[0m' % name
+ else:
+ _name = name
+
+ if mobj:
+ if group is None:
+ # return the first matching group
+ return next(g for g in mobj.groups() if g is not None)
+ else:
+ return mobj.group(group)
+ elif default is not NO_DEFAULT:
+ return default
+ elif fatal:
+ raise RegexNotFoundError('Unable to extract %s' % _name)
+ else:
+ self._downloader.report_warning('unable to extract %s' % _name + bug_reports_message())
+ return None
+
+ def _html_search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, flags=0, group=None):
+ """
+ Like _search_regex, but strips HTML tags and unescapes entities.
+ """
+ res = self._search_regex(pattern, string, name, default, fatal, flags, group)
+ if res:
+ return clean_html(res).strip()
+ else:
+ return res
+
+ def _get_netrc_login_info(self, netrc_machine=None):
+ username = None
+ password = None
+ netrc_machine = netrc_machine or self._NETRC_MACHINE
+
+ if self._downloader.params.get('usenetrc', False):
+ try:
+ info = netrc.netrc().authenticators(netrc_machine)
+ if info is not None:
+ username = info[0]
+ password = info[2]
+ else:
+ raise netrc.NetrcParseError(
+ 'No authenticators for %s' % netrc_machine)
+ except (IOError, netrc.NetrcParseError) as err:
+ self._downloader.report_warning(
+ 'parsing .netrc: %s' % error_to_compat_str(err))
+
+ return username, password
+
+ def _get_login_info(self, username_option='username', password_option='password', netrc_machine=None):
+ """
+ Get the login info as (username, password)
+ First look for the manually specified credentials using username_option
+ and password_option as keys in params dictionary. If no such credentials
+ available look in the netrc file using the netrc_machine or _NETRC_MACHINE
+ value.
+ If there's no info available, return (None, None)
+ """
+ if self._downloader is None:
+ return (None, None)
+
+ downloader_params = self._downloader.params
+
+ # Attempt to use provided username and password or .netrc data
+ if downloader_params.get(username_option) is not None:
+ username = downloader_params[username_option]
+ password = downloader_params[password_option]
+ else:
+ username, password = self._get_netrc_login_info(netrc_machine)
+
+ return username, password
+
+ def _get_tfa_info(self, note='two-factor verification code'):
+ """
+ Get the two-factor authentication info
+ TODO - asking the user will be required for sms/phone verify
+ currently just uses the command line option
+ If there's no info available, return None
+ """
+ if self._downloader is None:
+ return None
+ downloader_params = self._downloader.params
+
+ if downloader_params.get('twofactor') is not None:
+ return downloader_params['twofactor']
+
+ return compat_getpass('Type %s and press [Return]: ' % note)
+
+ # Helper functions for extracting OpenGraph info
+ @staticmethod
+ def _og_regexes(prop):
+ content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
+ property_re = (r'(?:name|property)=(?:\'og[:-]%(prop)s\'|"og[:-]%(prop)s"|\s*og[:-]%(prop)s\b)'
+ % {'prop': re.escape(prop)})
+ template = r'<meta[^>]+?%s[^>]+?%s'
+ return [
+ template % (property_re, content_re),
+ template % (content_re, property_re),
+ ]
+
+ @staticmethod
+ def _meta_regex(prop):
+ return r'''(?isx)<meta
+ (?=[^>]+(?:itemprop|name|property|id|http-equiv)=(["\']?)%s\1)
+ [^>]+?content=(["\'])(?P<content>.*?)\2''' % re.escape(prop)
+
+ def _og_search_property(self, prop, html, name=None, **kargs):
+ if not isinstance(prop, (list, tuple)):
+ prop = [prop]
+ if name is None:
+ name = 'OpenGraph %s' % prop[0]
+ og_regexes = []
+ for p in prop:
+ og_regexes.extend(self._og_regexes(p))
+ escaped = self._search_regex(og_regexes, html, name, flags=re.DOTALL, **kargs)
+ if escaped is None:
+ return None
+ return unescapeHTML(escaped)
+
+ def _og_search_thumbnail(self, html, **kargs):
+ return self._og_search_property('image', html, 'thumbnail URL', fatal=False, **kargs)
+
+ def _og_search_description(self, html, **kargs):
+ return self._og_search_property('description', html, fatal=False, **kargs)
+
+ def _og_search_title(self, html, **kargs):
+ return self._og_search_property('title', html, **kargs)
+
+ def _og_search_video_url(self, html, name='video url', secure=True, **kargs):
+ regexes = self._og_regexes('video') + self._og_regexes('video:url')
+ if secure:
+ regexes = self._og_regexes('video:secure_url') + regexes
+ return self._html_search_regex(regexes, html, name, **kargs)
+
+ def _og_search_url(self, html, **kargs):
+ return self._og_search_property('url', html, **kargs)
+
+ def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs):
+ if not isinstance(name, (list, tuple)):
+ name = [name]
+ if display_name is None:
+ display_name = name[0]
+ return self._html_search_regex(
+ [self._meta_regex(n) for n in name],
+ html, display_name, fatal=fatal, group='content', **kwargs)
+
+ def _dc_search_uploader(self, html):
+ return self._html_search_meta('dc.creator', html, 'uploader')
+
+ def _rta_search(self, html):
+ # See http://www.rtalabel.org/index.php?content=howtofaq#single
+ if re.search(r'(?ix)<meta\s+name="rating"\s+'
+ r' content="RTA-5042-1996-1400-1577-RTA"',
+ html):
+ return 18
+ return 0
+
+ def _media_rating_search(self, html):
+ # See http://www.tjg-designs.com/WP/metadata-code-examples-adding-metadata-to-your-web-pages/
+ rating = self._html_search_meta('rating', html)
+
+ if not rating:
+ return None
+
+ RATING_TABLE = {
+ 'safe for kids': 0,
+ 'general': 8,
+ '14 years': 14,
+ 'mature': 17,
+ 'restricted': 19,
+ }
+ return RATING_TABLE.get(rating.lower())
+
+ def _family_friendly_search(self, html):
+ # See http://schema.org/VideoObject
+ family_friendly = self._html_search_meta(
+ 'isFamilyFriendly', html, default=None)
+
+ if not family_friendly:
+ return None
+
+ RATING_TABLE = {
+ '1': 0,
+ 'true': 0,
+ '0': 18,
+ 'false': 18,
+ }
+ return RATING_TABLE.get(family_friendly.lower())
+
+ def _twitter_search_player(self, html):
+ return self._html_search_meta('twitter:player', html,
+ 'twitter card player')
+
+ def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
+ json_ld_list = list(re.finditer(JSON_LD_RE, html))
+ default = kwargs.get('default', NO_DEFAULT)
+ # JSON-LD may be malformed and thus `fatal` should be respected.
+ # At the same time `default` may be passed that assumes `fatal=False`
+ # for _search_regex. Let's simulate the same behavior here as well.
+ fatal = kwargs.get('fatal', True) if default == NO_DEFAULT else False
+ json_ld = []
+ for mobj in json_ld_list:
+ json_ld_item = self._parse_json(
+ mobj.group('json_ld'), video_id, fatal=fatal)
+ if not json_ld_item:
+ continue
+ if isinstance(json_ld_item, dict):
+ json_ld.append(json_ld_item)
+ elif isinstance(json_ld_item, (list, tuple)):
+ json_ld.extend(json_ld_item)
+ if json_ld:
+ json_ld = self._json_ld(json_ld, video_id, fatal=fatal, expected_type=expected_type)
+ if json_ld:
+ return json_ld
+ if default is not NO_DEFAULT:
+ return default
+ elif fatal:
+ raise RegexNotFoundError('Unable to extract JSON-LD')
+ else:
+ self._downloader.report_warning('unable to extract JSON-LD %s' % bug_reports_message())
+ return {}
+
+ def _json_ld(self, json_ld, video_id, fatal=True, expected_type=None):
+ if isinstance(json_ld, compat_str):
+ json_ld = self._parse_json(json_ld, video_id, fatal=fatal)
+ if not json_ld:
+ return {}
+ info = {}
+ if not isinstance(json_ld, (list, tuple, dict)):
+ return info
+ if isinstance(json_ld, dict):
+ json_ld = [json_ld]
+
+ INTERACTION_TYPE_MAP = {
+ 'CommentAction': 'comment',
+ 'AgreeAction': 'like',
+ 'DisagreeAction': 'dislike',
+ 'LikeAction': 'like',
+ 'DislikeAction': 'dislike',
+ 'ListenAction': 'view',
+ 'WatchAction': 'view',
+ 'ViewAction': 'view',
+ }
+
+ def extract_interaction_statistic(e):
+ interaction_statistic = e.get('interactionStatistic')
+ if not isinstance(interaction_statistic, list):
+ return
+ for is_e in interaction_statistic:
+ if not isinstance(is_e, dict):
+ continue
+ if is_e.get('@type') != 'InteractionCounter':
+ continue
+ interaction_type = is_e.get('interactionType')
+ if not isinstance(interaction_type, compat_str):
+ continue
+ interaction_count = int_or_none(is_e.get('userInteractionCount'))
+ if interaction_count is None:
+ continue
+ count_kind = INTERACTION_TYPE_MAP.get(interaction_type.split('/')[-1])
+ if not count_kind:
+ continue
+ count_key = '%s_count' % count_kind
+ if info.get(count_key) is not None:
+ continue
+ info[count_key] = interaction_count
+
+ def extract_video_object(e):
+ assert e['@type'] == 'VideoObject'
+ info.update({
+ 'url': url_or_none(e.get('contentUrl')),
+ 'title': unescapeHTML(e.get('name')),
+ 'description': unescapeHTML(e.get('description')),
+ 'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
+ 'duration': parse_duration(e.get('duration')),
+ 'timestamp': unified_timestamp(e.get('uploadDate')),
+ 'filesize': float_or_none(e.get('contentSize')),
+ 'tbr': int_or_none(e.get('bitrate')),
+ 'width': int_or_none(e.get('width')),
+ 'height': int_or_none(e.get('height')),
+ 'view_count': int_or_none(e.get('interactionCount')),
+ })
+ extract_interaction_statistic(e)
+
+ for e in json_ld:
+ if '@context' in e:
+ item_type = e.get('@type')
+ if expected_type is not None and expected_type != item_type:
+ continue
+ if item_type in ('TVEpisode', 'Episode'):
+ episode_name = unescapeHTML(e.get('name'))
+ info.update({
+ 'episode': episode_name,
+ 'episode_number': int_or_none(e.get('episodeNumber')),
+ 'description': unescapeHTML(e.get('description')),
+ })
+ if not info.get('title') and episode_name:
+ info['title'] = episode_name
+ part_of_season = e.get('partOfSeason')
+ if isinstance(part_of_season, dict) and part_of_season.get('@type') in ('TVSeason', 'Season', 'CreativeWorkSeason'):
+ info.update({
+ 'season': unescapeHTML(part_of_season.get('name')),
+ 'season_number': int_or_none(part_of_season.get('seasonNumber')),
+ })
+ part_of_series = e.get('partOfSeries') or e.get('partOfTVSeries')
+ if isinstance(part_of_series, dict) and part_of_series.get('@type') in ('TVSeries', 'Series', 'CreativeWorkSeries'):
+ info['series'] = unescapeHTML(part_of_series.get('name'))
+ elif item_type == 'Movie':
+ info.update({
+ 'title': unescapeHTML(e.get('name')),
+ 'description': unescapeHTML(e.get('description')),
+ 'duration': parse_duration(e.get('duration')),
+ 'timestamp': unified_timestamp(e.get('dateCreated')),
+ })
+ elif item_type in ('Article', 'NewsArticle'):
+ info.update({
+ 'timestamp': parse_iso8601(e.get('datePublished')),
+ 'title': unescapeHTML(e.get('headline')),
+ 'description': unescapeHTML(e.get('articleBody')),
+ })
+ elif item_type == 'VideoObject':
+ extract_video_object(e)
+ if expected_type is None:
+ continue
+ else:
+ break
+ video = e.get('video')
+ if isinstance(video, dict) and video.get('@type') == 'VideoObject':
+ extract_video_object(video)
+ if expected_type is None:
+ continue
+ else:
+ break
+ return dict((k, v) for k, v in info.items() if v is not None)
+
+ @staticmethod
+ def _hidden_inputs(html):
+ html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
+ hidden_inputs = {}
+ for input in re.findall(r'(?i)(<input[^>]+>)', html):
+ attrs = extract_attributes(input)
+ if not input:
+ continue
+ if attrs.get('type') not in ('hidden', 'submit'):
+ continue
+ name = attrs.get('name') or attrs.get('id')
+ value = attrs.get('value')
+ if name and value is not None:
+ hidden_inputs[name] = value
+ return hidden_inputs
+
+ def _form_hidden_inputs(self, form_id, html):
+ form = self._search_regex(
+ r'(?is)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id,
+ html, '%s form' % form_id, group='form')
+ return self._hidden_inputs(form)
+
+ def _sort_formats(self, formats, field_preference=None):
+ if not formats:
+ raise ExtractorError('No video formats found')
+
+ for f in formats:
+ # Automatically determine tbr when missing based on abr and vbr (improves
+ # formats sorting in some cases)
+ if 'tbr' not in f and f.get('abr') is not None and f.get('vbr') is not None:
+ f['tbr'] = f['abr'] + f['vbr']
+
+ def _formats_key(f):
+ # TODO remove the following workaround
+ from ..utils import determine_ext
+ if not f.get('ext') and 'url' in f:
+ f['ext'] = determine_ext(f['url'])
+
+ if isinstance(field_preference, (list, tuple)):
+ return tuple(
+ f.get(field)
+ if f.get(field) is not None
+ else ('' if field == 'format_id' else -1)
+ for field in field_preference)
+
+ preference = f.get('preference')
+ if preference is None:
+ preference = 0
+ if f.get('ext') in ['f4f', 'f4m']: # Not yet supported
+ preference -= 0.5
+
+ protocol = f.get('protocol') or determine_protocol(f)
+ proto_preference = 0 if protocol in ['http', 'https'] else (-0.5 if protocol == 'rtsp' else -0.1)
+
+ if f.get('vcodec') == 'none': # audio only
+ preference -= 50
+ if self._downloader.params.get('prefer_free_formats'):
+ ORDER = ['aac', 'mp3', 'm4a', 'webm', 'ogg', 'opus']
+ else:
+ ORDER = ['webm', 'opus', 'ogg', 'mp3', 'aac', 'm4a']
+ ext_preference = 0
+ try:
+ audio_ext_preference = ORDER.index(f['ext'])
+ except ValueError:
+ audio_ext_preference = -1
+ else:
+ if f.get('acodec') == 'none': # video only
+ preference -= 40
+ if self._downloader.params.get('prefer_free_formats'):
+ ORDER = ['flv', 'mp4', 'webm']
+ else:
+ ORDER = ['webm', 'flv', 'mp4']
+ try:
+ ext_preference = ORDER.index(f['ext'])
+ except ValueError:
+ ext_preference = -1
+ audio_ext_preference = 0
+
+ return (
+ preference,
+ f.get('language_preference') if f.get('language_preference') is not None else -1,
+ f.get('quality') if f.get('quality') is not None else -1,
+ f.get('tbr') if f.get('tbr') is not None else -1,
+ f.get('filesize') if f.get('filesize') is not None else -1,
+ f.get('vbr') if f.get('vbr') is not None else -1,
+ f.get('height') if f.get('height') is not None else -1,
+ f.get('width') if f.get('width') is not None else -1,
+ proto_preference,
+ ext_preference,
+ f.get('abr') if f.get('abr') is not None else -1,
+ audio_ext_preference,
+ f.get('fps') if f.get('fps') is not None else -1,
+ f.get('filesize_approx') if f.get('filesize_approx') is not None else -1,
+ f.get('source_preference') if f.get('source_preference') is not None else -1,
+ f.get('format_id') if f.get('format_id') is not None else '',
+ )
+ formats.sort(key=_formats_key)
+
+ def _check_formats(self, formats, video_id):
+ if formats:
+ formats[:] = filter(
+ lambda f: self._is_valid_url(
+ f['url'], video_id,
+ item='%s video format' % f.get('format_id') if f.get('format_id') else 'video'),
+ formats)
+
+ @staticmethod
+ def _remove_duplicate_formats(formats):
+ format_urls = set()
+ unique_formats = []
+ for f in formats:
+ if f['url'] not in format_urls:
+ format_urls.add(f['url'])
+ unique_formats.append(f)
+ formats[:] = unique_formats
+
+ def _is_valid_url(self, url, video_id, item='video', headers={}):
+ url = self._proto_relative_url(url, scheme='http:')
+ # For now assume non HTTP(S) URLs always valid
+ if not (url.startswith('http://') or url.startswith('https://')):
+ return True
+ try:
+ self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
+ return True
+ except ExtractorError:
+ self.to_screen(
+ '%s: %s URL is invalid, skipping' % (video_id, item))
+ return False
+
+ def http_scheme(self):
+ """ Either "http:" or "https:", depending on the user's preferences """
+ return (
+ 'http:'
+ if self._downloader.params.get('prefer_insecure', False)
+ else 'https:')
+
+ def _proto_relative_url(self, url, scheme=None):
+ if url is None:
+ return url
+ if url.startswith('//'):
+ if scheme is None:
+ scheme = self.http_scheme()
+ return scheme + url
+ else:
+ return url
+
+ def _sleep(self, timeout, video_id, msg_template=None):
+ if msg_template is None:
+ msg_template = '%(video_id)s: Waiting for %(timeout)s seconds'
+ msg = msg_template % {'video_id': video_id, 'timeout': timeout}
+ self.to_screen(msg)
+ time.sleep(timeout)
+
+ def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None,
+ transform_source=lambda s: fix_xml_ampersands(s).strip(),
+ fatal=True, m3u8_id=None, data=None, headers={}, query={}):
+ manifest = self._download_xml(
+ manifest_url, video_id, 'Downloading f4m manifest',
+ 'Unable to download f4m manifest',
+ # Some manifests may be malformed, e.g. prosiebensat1 generated manifests
+ # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244)
+ transform_source=transform_source,
+ fatal=fatal, data=data, headers=headers, query=query)
+
+ if manifest is False:
+ return []
+
+ return self._parse_f4m_formats(
+ manifest, manifest_url, video_id, preference=preference, f4m_id=f4m_id,
+ transform_source=transform_source, fatal=fatal, m3u8_id=m3u8_id)
+
+ def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None,
+ transform_source=lambda s: fix_xml_ampersands(s).strip(),
+ fatal=True, m3u8_id=None):
+ if not isinstance(manifest, compat_etree_Element) and not fatal:
+ return []
+
+ # currently youtube-dlc cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy
+ akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0')
+ if akamai_pv is not None and ';' in akamai_pv.text:
+ playerVerificationChallenge = akamai_pv.text.split(';')[0]
+ if playerVerificationChallenge.strip() != '':
+ return []
+
+ formats = []
+ manifest_version = '1.0'
+ media_nodes = manifest.findall('{http://ns.adobe.com/f4m/1.0}media')
+ if not media_nodes:
+ manifest_version = '2.0'
+ media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media')
+ # Remove unsupported DRM protected media from final formats
+ # rendition (see https://github.com/ytdl-org/youtube-dl/issues/8573).
+ media_nodes = remove_encrypted_media(media_nodes)
+ if not media_nodes:
+ return formats
+
+ manifest_base_url = get_base_url(manifest)
+
+ bootstrap_info = xpath_element(
+ manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
+ 'bootstrap info', default=None)
+
+ vcodec = None
+ mime_type = xpath_text(
+ manifest, ['{http://ns.adobe.com/f4m/1.0}mimeType', '{http://ns.adobe.com/f4m/2.0}mimeType'],
+ 'base URL', default=None)
+ if mime_type and mime_type.startswith('audio/'):
+ vcodec = 'none'
+
+ for i, media_el in enumerate(media_nodes):
+ tbr = int_or_none(media_el.attrib.get('bitrate'))
+ width = int_or_none(media_el.attrib.get('width'))
+ height = int_or_none(media_el.attrib.get('height'))
+ format_id = '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)]))
+ # If <bootstrapInfo> is present, the specified f4m is a
+ # stream-level manifest, and only set-level manifests may refer to
+ # external resources. See section 11.4 and section 4 of F4M spec
+ if bootstrap_info is None:
+ media_url = None
+ # @href is introduced in 2.0, see section 11.6 of F4M spec
+ if manifest_version == '2.0':
+ media_url = media_el.attrib.get('href')
+ if media_url is None:
+ media_url = media_el.attrib.get('url')
+ if not media_url:
+ continue
+ manifest_url = (
+ media_url if media_url.startswith('http://') or media_url.startswith('https://')
+ else ((manifest_base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
+ # If media_url is itself a f4m manifest do the recursive extraction
+ # since bitrates in parent manifest (this one) and media_url manifest
+ # may differ leading to inability to resolve the format by requested
+ # bitrate in f4m downloader
+ ext = determine_ext(manifest_url)
+ if ext == 'f4m':
+ f4m_formats = self._extract_f4m_formats(
+ manifest_url, video_id, preference=preference, f4m_id=f4m_id,
+ transform_source=transform_source, fatal=fatal)
+ # Sometimes stream-level manifest contains single media entry that
+ # does not contain any quality metadata (e.g. http://matchtv.ru/#live-player).
+ # At the same time parent's media entry in set-level manifest may
+ # contain it. We will copy it from parent in such cases.
+ if len(f4m_formats) == 1:
+ f = f4m_formats[0]
+ f.update({
+ 'tbr': f.get('tbr') or tbr,
+ 'width': f.get('width') or width,
+ 'height': f.get('height') or height,
+ 'format_id': f.get('format_id') if not tbr else format_id,
+ 'vcodec': vcodec,
+ })
+ formats.extend(f4m_formats)
+ continue
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ manifest_url, video_id, 'mp4', preference=preference,
+ m3u8_id=m3u8_id, fatal=fatal))
+ continue
+ formats.append({
+ 'format_id': format_id,
+ 'url': manifest_url,
+ 'manifest_url': manifest_url,
+ 'ext': 'flv' if bootstrap_info is not None else None,
+ 'protocol': 'f4m',
+ 'tbr': tbr,
+ 'width': width,
+ 'height': height,
+ 'vcodec': vcodec,
+ 'preference': preference,
+ })
+ return formats
+
+ def _m3u8_meta_format(self, m3u8_url, ext=None, preference=None, m3u8_id=None):
+ return {
+ 'format_id': '-'.join(filter(None, [m3u8_id, 'meta'])),
+ 'url': m3u8_url,
+ 'ext': ext,
+ 'protocol': 'm3u8',
+ 'preference': preference - 100 if preference else -100,
+ 'resolution': 'multiple',
+ 'format_note': 'Quality selection URL',
+ }
+
+ def _extract_m3u8_formats(self, m3u8_url, video_id, ext=None,
+ entry_protocol='m3u8', preference=None,
+ m3u8_id=None, note=None, errnote=None,
+ fatal=True, live=False, data=None, headers={},
+ query={}):
+ res = self._download_webpage_handle(
+ m3u8_url, video_id,
+ note=note or 'Downloading m3u8 information',
+ errnote=errnote or 'Failed to download m3u8 information',
+ fatal=fatal, data=data, headers=headers, query=query)
+
+ if res is False:
+ return []
+
+ m3u8_doc, urlh = res
+ m3u8_url = urlh.geturl()
+
+ return self._parse_m3u8_formats(
+ m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol,
+ preference=preference, m3u8_id=m3u8_id, live=live)
+
+ def _parse_m3u8_formats(self, m3u8_doc, m3u8_url, ext=None,
+ entry_protocol='m3u8', preference=None,
+ m3u8_id=None, live=False):
+ if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
+ return []
+
+ if re.search(r'#EXT-X-SESSION-KEY:.*?URI="skd://', m3u8_doc): # Apple FairPlay
+ return []
+
+ formats = []
+
+ format_url = lambda u: (
+ u
+ if re.match(r'^https?://', u)
+ else compat_urlparse.urljoin(m3u8_url, u))
+
+ # References:
+ # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-21
+ # 2. https://github.com/ytdl-org/youtube-dl/issues/12211
+ # 3. https://github.com/ytdl-org/youtube-dl/issues/18923
+
+ # We should try extracting formats only from master playlists [1, 4.3.4],
+ # i.e. playlists that describe available qualities. On the other hand
+ # media playlists [1, 4.3.3] should be returned as is since they contain
+ # just the media without qualities renditions.
+ # Fortunately, master playlist can be easily distinguished from media
+ # playlist based on particular tags availability. As of [1, 4.3.3, 4.3.4]
+ # master playlist tags MUST NOT appear in a media playist and vice versa.
+ # As of [1, 4.3.3.1] #EXT-X-TARGETDURATION tag is REQUIRED for every
+ # media playlist and MUST NOT appear in master playlist thus we can
+ # clearly detect media playlist with this criterion.
+
+ if '#EXT-X-TARGETDURATION' in m3u8_doc: # media playlist, return as is
+ return [{
+ 'url': m3u8_url,
+ 'format_id': m3u8_id,
+ 'ext': ext,
+ 'protocol': entry_protocol,
+ 'preference': preference,
+ }]
+
+ groups = {}
+ last_stream_inf = {}
+
+ def extract_media(x_media_line):
+ media = parse_m3u8_attributes(x_media_line)
+ # As per [1, 4.3.4.1] TYPE, GROUP-ID and NAME are REQUIRED
+ media_type, group_id, name = media.get('TYPE'), media.get('GROUP-ID'), media.get('NAME')
+ if not (media_type and group_id and name):
+ return
+ groups.setdefault(group_id, []).append(media)
+ if media_type not in ('VIDEO', 'AUDIO'):
+ return
+ media_url = media.get('URI')
+ if media_url:
+ format_id = []
+ for v in (m3u8_id, group_id, name):
+ if v:
+ format_id.append(v)
+ f = {
+ 'format_id': '-'.join(format_id),
+ 'url': format_url(media_url),
+ 'manifest_url': m3u8_url,
+ 'language': media.get('LANGUAGE'),
+ 'ext': ext,
+ 'protocol': entry_protocol,
+ 'preference': preference,
+ }
+ if media_type == 'AUDIO':
+ f['vcodec'] = 'none'
+ formats.append(f)
+
+ def build_stream_name():
+ # Despite specification does not mention NAME attribute for
+ # EXT-X-STREAM-INF tag it still sometimes may be present (see [1]
+ # or vidio test in TestInfoExtractor.test_parse_m3u8_formats)
+ # 1. http://www.vidio.com/watch/165683-dj_ambred-booyah-live-2015
+ stream_name = last_stream_inf.get('NAME')
+ if stream_name:
+ return stream_name
+ # If there is no NAME in EXT-X-STREAM-INF it will be obtained
+ # from corresponding rendition group
+ stream_group_id = last_stream_inf.get('VIDEO')
+ if not stream_group_id:
+ return
+ stream_group = groups.get(stream_group_id)
+ if not stream_group:
+ return stream_group_id
+ rendition = stream_group[0]
+ return rendition.get('NAME') or stream_group_id
+
+ # parse EXT-X-MEDIA tags before EXT-X-STREAM-INF in order to have the
+ # chance to detect video only formats when EXT-X-STREAM-INF tags
+ # precede EXT-X-MEDIA tags in HLS manifest such as [3].
+ for line in m3u8_doc.splitlines():
+ if line.startswith('#EXT-X-MEDIA:'):
+ extract_media(line)
+
+ for line in m3u8_doc.splitlines():
+ if line.startswith('#EXT-X-STREAM-INF:'):
+ last_stream_inf = parse_m3u8_attributes(line)
+ elif line.startswith('#') or not line.strip():
+ continue
+ else:
+ tbr = float_or_none(
+ last_stream_inf.get('AVERAGE-BANDWIDTH')
+ or last_stream_inf.get('BANDWIDTH'), scale=1000)
+ format_id = []
+ if m3u8_id:
+ format_id.append(m3u8_id)
+ stream_name = build_stream_name()
+ # Bandwidth of live streams may differ over time thus making
+ # format_id unpredictable. So it's better to keep provided
+ # format_id intact.
+ if not live:
+ format_id.append(stream_name if stream_name else '%d' % (tbr if tbr else len(formats)))
+ manifest_url = format_url(line.strip())
+ f = {
+ 'format_id': '-'.join(format_id),
+ 'url': manifest_url,
+ 'manifest_url': m3u8_url,
+ 'tbr': tbr,
+ 'ext': ext,
+ 'fps': float_or_none(last_stream_inf.get('FRAME-RATE')),
+ 'protocol': entry_protocol,
+ 'preference': preference,
+ }
+ resolution = last_stream_inf.get('RESOLUTION')
+ if resolution:
+ mobj = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', resolution)
+ if mobj:
+ f['width'] = int(mobj.group('width'))
+ f['height'] = int(mobj.group('height'))
+ # Unified Streaming Platform
+ mobj = re.search(
+ r'audio.*?(?:%3D|=)(\d+)(?:-video.*?(?:%3D|=)(\d+))?', f['url'])
+ if mobj:
+ abr, vbr = mobj.groups()
+ abr, vbr = float_or_none(abr, 1000), float_or_none(vbr, 1000)
+ f.update({
+ 'vbr': vbr,
+ 'abr': abr,
+ })
+ codecs = parse_codecs(last_stream_inf.get('CODECS'))
+ f.update(codecs)
+ audio_group_id = last_stream_inf.get('AUDIO')
+ # As per [1, 4.3.4.1.1] any EXT-X-STREAM-INF tag which
+ # references a rendition group MUST have a CODECS attribute.
+ # However, this is not always respected, for example, [2]
+ # contains EXT-X-STREAM-INF tag which references AUDIO
+ # rendition group but does not have CODECS and despite
+ # referencing an audio group it represents a complete
+ # (with audio and video) format. So, for such cases we will
+ # ignore references to rendition groups and treat them
+ # as complete formats.
+ if audio_group_id and codecs and f.get('vcodec') != 'none':
+ audio_group = groups.get(audio_group_id)
+ if audio_group and audio_group[0].get('URI'):
+ # TODO: update acodec for audio only formats with
+ # the same GROUP-ID
+ f['acodec'] = 'none'
+ formats.append(f)
+
+ # for DailyMotion
+ progressive_uri = last_stream_inf.get('PROGRESSIVE-URI')
+ if progressive_uri:
+ http_f = f.copy()
+ del http_f['manifest_url']
+ http_f.update({
+ 'format_id': f['format_id'].replace('hls-', 'http-'),
+ 'protocol': 'http',
+ 'url': progressive_uri,
+ })
+ formats.append(http_f)
+
+ last_stream_inf = {}
+ return formats
+
+ @staticmethod
+ def _xpath_ns(path, namespace=None):
+ if not namespace:
+ return path
+ out = []
+ for c in path.split('/'):
+ if not c or c == '.':
+ out.append(c)
+ else:
+ out.append('{%s}%s' % (namespace, c))
+ return '/'.join(out)
+
+ def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None, transform_source=None):
+ smil = self._download_smil(smil_url, video_id, fatal=fatal, transform_source=transform_source)
+
+ if smil is False:
+ assert not fatal
+ return []
+
+ namespace = self._parse_smil_namespace(smil)
+
+ return self._parse_smil_formats(
+ smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
+
+ def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None):
+ smil = self._download_smil(smil_url, video_id, fatal=fatal)
+ if smil is False:
+ return {}
+ return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params)
+
+ def _download_smil(self, smil_url, video_id, fatal=True, transform_source=None):
+ return self._download_xml(
+ smil_url, video_id, 'Downloading SMIL file',
+ 'Unable to download SMIL file', fatal=fatal, transform_source=transform_source)
+
+ def _parse_smil(self, smil, smil_url, video_id, f4m_params=None):
+ namespace = self._parse_smil_namespace(smil)
+
+ formats = self._parse_smil_formats(
+ smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
+ subtitles = self._parse_smil_subtitles(smil, namespace=namespace)
+
+ video_id = os.path.splitext(url_basename(smil_url))[0]
+ title = None
+ description = None
+ upload_date = None
+ for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
+ name = meta.attrib.get('name')
+ content = meta.attrib.get('content')
+ if not name or not content:
+ continue
+ if not title and name == 'title':
+ title = content
+ elif not description and name in ('description', 'abstract'):
+ description = content
+ elif not upload_date and name == 'date':
+ upload_date = unified_strdate(content)
+
+ thumbnails = [{
+ 'id': image.get('type'),
+ 'url': image.get('src'),
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ } for image in smil.findall(self._xpath_ns('.//image', namespace)) if image.get('src')]
+
+ return {
+ 'id': video_id,
+ 'title': title or video_id,
+ 'description': description,
+ 'upload_date': upload_date,
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def _parse_smil_namespace(self, smil):
+ return self._search_regex(
+ r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
+
+ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
+ base = smil_url
+ for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
+ b = meta.get('base') or meta.get('httpBase')
+ if b:
+ base = b
+ break
+
+ formats = []
+ rtmp_count = 0
+ http_count = 0
+ m3u8_count = 0
+
+ srcs = []
+ media = smil.findall(self._xpath_ns('.//video', namespace)) + smil.findall(self._xpath_ns('.//audio', namespace))
+ for medium in media:
+ src = medium.get('src')
+ if not src or src in srcs:
+ continue
+ srcs.append(src)
+
+ bitrate = float_or_none(medium.get('system-bitrate') or medium.get('systemBitrate'), 1000)
+ filesize = int_or_none(medium.get('size') or medium.get('fileSize'))
+ width = int_or_none(medium.get('width'))
+ height = int_or_none(medium.get('height'))
+ proto = medium.get('proto')
+ ext = medium.get('ext')
+ src_ext = determine_ext(src)
+ streamer = medium.get('streamer') or base
+
+ if proto == 'rtmp' or streamer.startswith('rtmp'):
+ rtmp_count += 1
+ formats.append({
+ 'url': streamer,
+ 'play_path': src,
+ 'ext': 'flv',
+ 'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
+ 'tbr': bitrate,
+ 'filesize': filesize,
+ 'width': width,
+ 'height': height,
+ })
+ if transform_rtmp_url:
+ streamer, src = transform_rtmp_url(streamer, src)
+ formats[-1].update({
+ 'url': streamer,
+ 'play_path': src,
+ })
+ continue
+
+ src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
+ src_url = src_url.strip()
+
+ if proto == 'm3u8' or src_ext == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(
+ src_url, video_id, ext or 'mp4', m3u8_id='hls', fatal=False)
+ if len(m3u8_formats) == 1:
+ m3u8_count += 1
+ m3u8_formats[0].update({
+ 'format_id': 'hls-%d' % (m3u8_count if bitrate is None else bitrate),
+ 'tbr': bitrate,
+ 'width': width,
+ 'height': height,
+ })
+ formats.extend(m3u8_formats)
+ elif src_ext == 'f4m':
+ f4m_url = src_url
+ if not f4m_params:
+ f4m_params = {
+ 'hdcore': '3.2.0',
+ 'plugin': 'flowplayer-3.2.0.1',
+ }
+ f4m_url += '&' if '?' in f4m_url else '?'
+ f4m_url += compat_urllib_parse_urlencode(f4m_params)
+ formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds', fatal=False))
+ elif src_ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ src_url, video_id, mpd_id='dash', fatal=False))
+ elif re.search(r'\.ism/[Mm]anifest', src_url):
+ formats.extend(self._extract_ism_formats(
+ src_url, video_id, ism_id='mss', fatal=False))
+ elif src_url.startswith('http') and self._is_valid_url(src, video_id):
+ http_count += 1
+ formats.append({
+ 'url': src_url,
+ 'ext': ext or src_ext or 'flv',
+ 'format_id': 'http-%d' % (bitrate or http_count),
+ 'tbr': bitrate,
+ 'filesize': filesize,
+ 'width': width,
+ 'height': height,
+ })
+
+ return formats
+
+ def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
+ urls = []
+ subtitles = {}
+ for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
+ src = textstream.get('src')
+ if not src or src in urls:
+ continue
+ urls.append(src)
+ ext = textstream.get('ext') or mimetype2ext(textstream.get('type')) or determine_ext(src)
+ lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName') or textstream.get('lang') or subtitles_lang
+ subtitles.setdefault(lang, []).append({
+ 'url': src,
+ 'ext': ext,
+ })
+ return subtitles
+
+ def _extract_xspf_playlist(self, xspf_url, playlist_id, fatal=True):
+ xspf = self._download_xml(
+ xspf_url, playlist_id, 'Downloading xpsf playlist',
+ 'Unable to download xspf manifest', fatal=fatal)
+ if xspf is False:
+ return []
+ return self._parse_xspf(
+ xspf, playlist_id, xspf_url=xspf_url,
+ xspf_base_url=base_url(xspf_url))
+
+ def _parse_xspf(self, xspf_doc, playlist_id, xspf_url=None, xspf_base_url=None):
+ NS_MAP = {
+ 'xspf': 'http://xspf.org/ns/0/',
+ 's1': 'http://static.streamone.nl/player/ns/0',
+ }
+
+ entries = []
+ for track in xspf_doc.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
+ title = xpath_text(
+ track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id)
+ description = xpath_text(
+ track, xpath_with_ns('./xspf:annotation', NS_MAP), 'description')
+ thumbnail = xpath_text(
+ track, xpath_with_ns('./xspf:image', NS_MAP), 'thumbnail')
+ duration = float_or_none(
+ xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)
+
+ formats = []
+ for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP)):
+ format_url = urljoin(xspf_base_url, location.text)
+ if not format_url:
+ continue
+ formats.append({
+ 'url': format_url,
+ 'manifest_url': xspf_url,
+ 'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
+ 'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
+ 'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
+ })
+ self._sort_formats(formats)
+
+ entries.append({
+ 'id': playlist_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ })
+ return entries
+
+ def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}, data=None, headers={}, query={}):
+ res = self._download_xml_handle(
+ mpd_url, video_id,
+ note=note or 'Downloading MPD manifest',
+ errnote=errnote or 'Failed to download MPD manifest',
+ fatal=fatal, data=data, headers=headers, query=query)
+ if res is False:
+ return []
+ mpd_doc, urlh = res
+ if mpd_doc is None:
+ return []
+ mpd_base_url = base_url(urlh.geturl())
+
+ return self._parse_mpd_formats(
+ mpd_doc, mpd_id=mpd_id, mpd_base_url=mpd_base_url,
+ formats_dict=formats_dict, mpd_url=mpd_url)
+
+ def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None):
+ """
+ Parse formats from MPD manifest.
+ References:
+ 1. MPEG-DASH Standard, ISO/IEC 23009-1:2014(E),
+ http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip
+ 2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP
+ """
+ if mpd_doc.get('type') == 'dynamic':
+ return []
+
+ namespace = self._search_regex(r'(?i)^{([^}]+)?}MPD$', mpd_doc.tag, 'namespace', default=None)
+
+ def _add_ns(path):
+ return self._xpath_ns(path, namespace)
+
+ def is_drm_protected(element):
+ return element.find(_add_ns('ContentProtection')) is not None
+
+ def extract_multisegment_info(element, ms_parent_info):
+ ms_info = ms_parent_info.copy()
+
+ # As per [1, 5.3.9.2.2] SegmentList and SegmentTemplate share some
+ # common attributes and elements. We will only extract relevant
+ # for us.
+ def extract_common(source):
+ segment_timeline = source.find(_add_ns('SegmentTimeline'))
+ if segment_timeline is not None:
+ s_e = segment_timeline.findall(_add_ns('S'))
+ if s_e:
+ ms_info['total_number'] = 0
+ ms_info['s'] = []
+ for s in s_e:
+ r = int(s.get('r', 0))
+ ms_info['total_number'] += 1 + r
+ ms_info['s'].append({
+ 't': int(s.get('t', 0)),
+ # @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60])
+ 'd': int(s.attrib['d']),
+ 'r': r,
+ })
+ start_number = source.get('startNumber')
+ if start_number:
+ ms_info['start_number'] = int(start_number)
+ timescale = source.get('timescale')
+ if timescale:
+ ms_info['timescale'] = int(timescale)
+ segment_duration = source.get('duration')
+ if segment_duration:
+ ms_info['segment_duration'] = float(segment_duration)
+
+ def extract_Initialization(source):
+ initialization = source.find(_add_ns('Initialization'))
+ if initialization is not None:
+ ms_info['initialization_url'] = initialization.attrib['sourceURL']
+
+ segment_list = element.find(_add_ns('SegmentList'))
+ if segment_list is not None:
+ extract_common(segment_list)
+ extract_Initialization(segment_list)
+ segment_urls_e = segment_list.findall(_add_ns('SegmentURL'))
+ if segment_urls_e:
+ ms_info['segment_urls'] = [segment.attrib['media'] for segment in segment_urls_e]
+ else:
+ segment_template = element.find(_add_ns('SegmentTemplate'))
+ if segment_template is not None:
+ extract_common(segment_template)
+ media = segment_template.get('media')
+ if media:
+ ms_info['media'] = media
+ initialization = segment_template.get('initialization')
+ if initialization:
+ ms_info['initialization'] = initialization
+ else:
+ extract_Initialization(segment_template)
+ return ms_info
+
+ mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
+ formats = []
+ for period in mpd_doc.findall(_add_ns('Period')):
+ period_duration = parse_duration(period.get('duration')) or mpd_duration
+ period_ms_info = extract_multisegment_info(period, {
+ 'start_number': 1,
+ 'timescale': 1,
+ })
+ for adaptation_set in period.findall(_add_ns('AdaptationSet')):
+ if is_drm_protected(adaptation_set):
+ continue
+ adaption_set_ms_info = extract_multisegment_info(adaptation_set, period_ms_info)
+ for representation in adaptation_set.findall(_add_ns('Representation')):
+ if is_drm_protected(representation):
+ continue
+ representation_attrib = adaptation_set.attrib.copy()
+ representation_attrib.update(representation.attrib)
+ # According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory
+ mime_type = representation_attrib['mimeType']
+ content_type = mime_type.split('/')[0]
+ if content_type == 'text':
+ # TODO implement WebVTT downloading
+ pass
+ elif content_type in ('video', 'audio'):
+ base_url = ''
+ for element in (representation, adaptation_set, period, mpd_doc):
+ base_url_e = element.find(_add_ns('BaseURL'))
+ if base_url_e is not None:
+ base_url = base_url_e.text + base_url
+ if re.match(r'^https?://', base_url):
+ break
+ if mpd_base_url and not re.match(r'^https?://', base_url):
+ if not mpd_base_url.endswith('/') and not base_url.startswith('/'):
+ mpd_base_url += '/'
+ base_url = mpd_base_url + base_url
+ representation_id = representation_attrib.get('id')
+ lang = representation_attrib.get('lang')
+ url_el = representation.find(_add_ns('BaseURL'))
+ filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
+ bandwidth = int_or_none(representation_attrib.get('bandwidth'))
+ f = {
+ 'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
+ 'manifest_url': mpd_url,
+ 'ext': mimetype2ext(mime_type),
+ 'width': int_or_none(representation_attrib.get('width')),
+ 'height': int_or_none(representation_attrib.get('height')),
+ 'tbr': float_or_none(bandwidth, 1000),
+ 'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
+ 'fps': int_or_none(representation_attrib.get('frameRate')),
+ 'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
+ 'format_note': 'DASH %s' % content_type,
+ 'filesize': filesize,
+ 'container': mimetype2ext(mime_type) + '_dash',
+ }
+ f.update(parse_codecs(representation_attrib.get('codecs')))
+ representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
+
+ def prepare_template(template_name, identifiers):
+ tmpl = representation_ms_info[template_name]
+ # First of, % characters outside $...$ templates
+ # must be escaped by doubling for proper processing
+ # by % operator string formatting used further (see
+ # https://github.com/ytdl-org/youtube-dl/issues/16867).
+ t = ''
+ in_template = False
+ for c in tmpl:
+ t += c
+ if c == '$':
+ in_template = not in_template
+ elif c == '%' and not in_template:
+ t += c
+ # Next, $...$ templates are translated to their
+ # %(...) counterparts to be used with % operator
+ t = t.replace('$RepresentationID$', representation_id)
+ t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
+ t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
+ t.replace('$$', '$')
+ return t
+
+ # @initialization is a regular template like @media one
+ # so it should be handled just the same way (see
+ # https://github.com/ytdl-org/youtube-dl/issues/11605)
+ if 'initialization' in representation_ms_info:
+ initialization_template = prepare_template(
+ 'initialization',
+ # As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and
+ # $Time$ shall not be included for @initialization thus
+ # only $Bandwidth$ remains
+ ('Bandwidth', ))
+ representation_ms_info['initialization_url'] = initialization_template % {
+ 'Bandwidth': bandwidth,
+ }
+
+ def location_key(location):
+ return 'url' if re.match(r'^https?://', location) else 'path'
+
+ if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
+
+ media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
+ media_location_key = location_key(media_template)
+
+ # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
+ # can't be used at the same time
+ if '%(Number' in media_template and 's' not in representation_ms_info:
+ segment_duration = None
+ if 'total_number' not in representation_ms_info and 'segment_duration' in representation_ms_info:
+ segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
+ representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
+ representation_ms_info['fragments'] = [{
+ media_location_key: media_template % {
+ 'Number': segment_number,
+ 'Bandwidth': bandwidth,
+ },
+ 'duration': segment_duration,
+ } for segment_number in range(
+ representation_ms_info['start_number'],
+ representation_ms_info['total_number'] + representation_ms_info['start_number'])]
+ else:
+ # $Number*$ or $Time$ in media template with S list available
+ # Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg
+ # Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411
+ representation_ms_info['fragments'] = []
+ segment_time = 0
+ segment_d = None
+ segment_number = representation_ms_info['start_number']
+
+ def add_segment_url():
+ segment_url = media_template % {
+ 'Time': segment_time,
+ 'Bandwidth': bandwidth,
+ 'Number': segment_number,
+ }
+ representation_ms_info['fragments'].append({
+ media_location_key: segment_url,
+ 'duration': float_or_none(segment_d, representation_ms_info['timescale']),
+ })
+
+ for num, s in enumerate(representation_ms_info['s']):
+ segment_time = s.get('t') or segment_time
+ segment_d = s['d']
+ add_segment_url()
+ segment_number += 1
+ for r in range(s.get('r', 0)):
+ segment_time += segment_d
+ add_segment_url()
+ segment_number += 1
+ segment_time += segment_d
+ elif 'segment_urls' in representation_ms_info and 's' in representation_ms_info:
+ # No media template
+ # Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
+ # or any YouTube dashsegments video
+ fragments = []
+ segment_index = 0
+ timescale = representation_ms_info['timescale']
+ for s in representation_ms_info['s']:
+ duration = float_or_none(s['d'], timescale)
+ for r in range(s.get('r', 0) + 1):
+ segment_uri = representation_ms_info['segment_urls'][segment_index]
+ fragments.append({
+ location_key(segment_uri): segment_uri,
+ 'duration': duration,
+ })
+ segment_index += 1
+ representation_ms_info['fragments'] = fragments
+ elif 'segment_urls' in representation_ms_info:
+ # Segment URLs with no SegmentTimeline
+ # Example: https://www.seznam.cz/zpravy/clanek/cesko-zasahne-vitr-o-sile-vichrice-muze-byt-i-zivotu-nebezpecny-39091
+ # https://github.com/ytdl-org/youtube-dl/pull/14844
+ fragments = []
+ segment_duration = float_or_none(
+ representation_ms_info['segment_duration'],
+ representation_ms_info['timescale']) if 'segment_duration' in representation_ms_info else None
+ for segment_url in representation_ms_info['segment_urls']:
+ fragment = {
+ location_key(segment_url): segment_url,
+ }
+ if segment_duration:
+ fragment['duration'] = segment_duration
+ fragments.append(fragment)
+ representation_ms_info['fragments'] = fragments
+ # If there is a fragments key available then we correctly recognized fragmented media.
+ # Otherwise we will assume unfragmented media with direct access. Technically, such
+ # assumption is not necessarily correct since we may simply have no support for
+ # some forms of fragmented media renditions yet, but for now we'll use this fallback.
+ if 'fragments' in representation_ms_info:
+ f.update({
+ # NB: mpd_url may be empty when MPD manifest is parsed from a string
+ 'url': mpd_url or base_url,
+ 'fragment_base_url': base_url,
+ 'fragments': [],
+ 'protocol': 'http_dash_segments',
+ })
+ if 'initialization_url' in representation_ms_info:
+ initialization_url = representation_ms_info['initialization_url']
+ if not f.get('url'):
+ f['url'] = initialization_url
+ f['fragments'].append({location_key(initialization_url): initialization_url})
+ f['fragments'].extend(representation_ms_info['fragments'])
+ else:
+ # Assuming direct URL to unfragmented media.
+ f['url'] = base_url
+
+ # According to [1, 5.3.5.2, Table 7, page 35] @id of Representation
+ # is not necessarily unique within a Period thus formats with
+ # the same `format_id` are quite possible. There are numerous examples
+ # of such manifests (see https://github.com/ytdl-org/youtube-dl/issues/15111,
+ # https://github.com/ytdl-org/youtube-dl/issues/13919)
+ full_info = formats_dict.get(representation_id, {}).copy()
+ full_info.update(f)
+ formats.append(full_info)
+ else:
+ self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
+ return formats
+
+ def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True, data=None, headers={}, query={}):
+ res = self._download_xml_handle(
+ ism_url, video_id,
+ note=note or 'Downloading ISM manifest',
+ errnote=errnote or 'Failed to download ISM manifest',
+ fatal=fatal, data=data, headers=headers, query=query)
+ if res is False:
+ return []
+ ism_doc, urlh = res
+ if ism_doc is None:
+ return []
+
+ return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
+
+ def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
+ """
+ Parse formats from ISM manifest.
+ References:
+ 1. [MS-SSTR]: Smooth Streaming Protocol,
+ https://msdn.microsoft.com/en-us/library/ff469518.aspx
+ """
+ if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None:
+ return []
+
+ duration = int(ism_doc.attrib['Duration'])
+ timescale = int_or_none(ism_doc.get('TimeScale')) or 10000000
+
+ formats = []
+ for stream in ism_doc.findall('StreamIndex'):
+ stream_type = stream.get('Type')
+ if stream_type not in ('video', 'audio'):
+ continue
+ url_pattern = stream.attrib['Url']
+ stream_timescale = int_or_none(stream.get('TimeScale')) or timescale
+ stream_name = stream.get('Name')
+ for track in stream.findall('QualityLevel'):
+ fourcc = track.get('FourCC', 'AACL' if track.get('AudioTag') == '255' else None)
+ # TODO: add support for WVC1 and WMAP
+ if fourcc not in ('H264', 'AVC1', 'AACL'):
+ self.report_warning('%s is not a supported codec' % fourcc)
+ continue
+ tbr = int(track.attrib['Bitrate']) // 1000
+ # [1] does not mention Width and Height attributes. However,
+ # they're often present while MaxWidth and MaxHeight are
+ # missing, so should be used as fallbacks
+ width = int_or_none(track.get('MaxWidth') or track.get('Width'))
+ height = int_or_none(track.get('MaxHeight') or track.get('Height'))
+ sampling_rate = int_or_none(track.get('SamplingRate'))
+
+ track_url_pattern = re.sub(r'{[Bb]itrate}', track.attrib['Bitrate'], url_pattern)
+ track_url_pattern = compat_urlparse.urljoin(ism_url, track_url_pattern)
+
+ fragments = []
+ fragment_ctx = {
+ 'time': 0,
+ }
+ stream_fragments = stream.findall('c')
+ for stream_fragment_index, stream_fragment in enumerate(stream_fragments):
+ fragment_ctx['time'] = int_or_none(stream_fragment.get('t')) or fragment_ctx['time']
+ fragment_repeat = int_or_none(stream_fragment.get('r')) or 1
+ fragment_ctx['duration'] = int_or_none(stream_fragment.get('d'))
+ if not fragment_ctx['duration']:
+ try:
+ next_fragment_time = int(stream_fragment[stream_fragment_index + 1].attrib['t'])
+ except IndexError:
+ next_fragment_time = duration
+ fragment_ctx['duration'] = (next_fragment_time - fragment_ctx['time']) / fragment_repeat
+ for _ in range(fragment_repeat):
+ fragments.append({
+ 'url': re.sub(r'{start[ _]time}', compat_str(fragment_ctx['time']), track_url_pattern),
+ 'duration': fragment_ctx['duration'] / stream_timescale,
+ })
+ fragment_ctx['time'] += fragment_ctx['duration']
+
+ format_id = []
+ if ism_id:
+ format_id.append(ism_id)
+ if stream_name:
+ format_id.append(stream_name)
+ format_id.append(compat_str(tbr))
+
+ formats.append({
+ 'format_id': '-'.join(format_id),
+ 'url': ism_url,
+ 'manifest_url': ism_url,
+ 'ext': 'ismv' if stream_type == 'video' else 'isma',
+ 'width': width,
+ 'height': height,
+ 'tbr': tbr,
+ 'asr': sampling_rate,
+ 'vcodec': 'none' if stream_type == 'audio' else fourcc,
+ 'acodec': 'none' if stream_type == 'video' else fourcc,
+ 'protocol': 'ism',
+ 'fragments': fragments,
+ '_download_params': {
+ 'duration': duration,
+ 'timescale': stream_timescale,
+ 'width': width or 0,
+ 'height': height or 0,
+ 'fourcc': fourcc,
+ 'codec_private_data': track.get('CodecPrivateData'),
+ 'sampling_rate': sampling_rate,
+ 'channels': int_or_none(track.get('Channels', 2)),
+ 'bits_per_sample': int_or_none(track.get('BitsPerSample', 16)),
+ 'nal_unit_length_field': int_or_none(track.get('NALUnitLengthField', 4)),
+ },
+ })
+ return formats
+
+ def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None, preference=None):
+ def absolute_url(item_url):
+ return urljoin(base_url, item_url)
+
+ def parse_content_type(content_type):
+ if not content_type:
+ return {}
+ ctr = re.search(r'(?P<mimetype>[^/]+/[^;]+)(?:;\s*codecs="?(?P<codecs>[^"]+))?', content_type)
+ if ctr:
+ mimetype, codecs = ctr.groups()
+ f = parse_codecs(codecs)
+ f['ext'] = mimetype2ext(mimetype)
+ return f
+ return {}
+
+ def _media_formats(src, cur_media_type, type_info={}):
+ full_url = absolute_url(src)
+ ext = type_info.get('ext') or determine_ext(full_url)
+ if ext == 'm3u8':
+ is_plain_url = False
+ formats = self._extract_m3u8_formats(
+ full_url, video_id, ext='mp4',
+ entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id,
+ preference=preference, fatal=False)
+ elif ext == 'mpd':
+ is_plain_url = False
+ formats = self._extract_mpd_formats(
+ full_url, video_id, mpd_id=mpd_id, fatal=False)
+ else:
+ is_plain_url = True
+ formats = [{
+ 'url': full_url,
+ 'vcodec': 'none' if cur_media_type == 'audio' else None,
+ }]
+ return is_plain_url, formats
+
+ entries = []
+ # amp-video and amp-audio are very similar to their HTML5 counterparts
+ # so we wll include them right here (see
+ # https://www.ampproject.org/docs/reference/components/amp-video)
+ media_tags = [(media_tag, media_type, '')
+ for media_tag, media_type
+ in re.findall(r'(?s)(<(?:amp-)?(video|audio)[^>]*/>)', webpage)]
+ media_tags.extend(re.findall(
+ # We only allow video|audio followed by a whitespace or '>'.
+ # Allowing more characters may end up in significant slow down (see
+ # https://github.com/ytdl-org/youtube-dl/issues/11979, example URL:
+ # http://www.porntrex.com/maps/videositemap.xml).
+ r'(?s)(<(?P<tag>(?:amp-)?(?:video|audio))(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
+ for media_tag, media_type, media_content in media_tags:
+ media_info = {
+ 'formats': [],
+ 'subtitles': {},
+ }
+ media_attributes = extract_attributes(media_tag)
+ src = strip_or_none(media_attributes.get('src'))
+ if src:
+ _, formats = _media_formats(src, media_type)
+ media_info['formats'].extend(formats)
+ media_info['thumbnail'] = absolute_url(media_attributes.get('poster'))
+ if media_content:
+ for source_tag in re.findall(r'<source[^>]+>', media_content):
+ s_attr = extract_attributes(source_tag)
+ # data-video-src and data-src are non standard but seen
+ # several times in the wild
+ src = strip_or_none(dict_get(s_attr, ('src', 'data-video-src', 'data-src')))
+ if not src:
+ continue
+ f = parse_content_type(s_attr.get('type'))
+ is_plain_url, formats = _media_formats(src, media_type, f)
+ if is_plain_url:
+ # width, height, res, label and title attributes are
+ # all not standard but seen several times in the wild
+ labels = [
+ s_attr.get(lbl)
+ for lbl in ('label', 'title')
+ if str_or_none(s_attr.get(lbl))
+ ]
+ width = int_or_none(s_attr.get('width'))
+ height = (int_or_none(s_attr.get('height'))
+ or int_or_none(s_attr.get('res')))
+ if not width or not height:
+ for lbl in labels:
+ resolution = parse_resolution(lbl)
+ if not resolution:
+ continue
+ width = width or resolution.get('width')
+ height = height or resolution.get('height')
+ for lbl in labels:
+ tbr = parse_bitrate(lbl)
+ if tbr:
+ break
+ else:
+ tbr = None
+ f.update({
+ 'width': width,
+ 'height': height,
+ 'tbr': tbr,
+ 'format_id': s_attr.get('label') or s_attr.get('title'),
+ })
+ f.update(formats[0])
+ media_info['formats'].append(f)
+ else:
+ media_info['formats'].extend(formats)
+ for track_tag in re.findall(r'<track[^>]+>', media_content):
+ track_attributes = extract_attributes(track_tag)
+ kind = track_attributes.get('kind')
+ if not kind or kind in ('subtitles', 'captions'):
+ src = strip_or_none(track_attributes.get('src'))
+ if not src:
+ continue
+ lang = track_attributes.get('srclang') or track_attributes.get('lang') or track_attributes.get('label')
+ media_info['subtitles'].setdefault(lang, []).append({
+ 'url': absolute_url(src),
+ })
+ for f in media_info['formats']:
+ f.setdefault('http_headers', {})['Referer'] = base_url
+ if media_info['formats'] or media_info['subtitles']:
+ entries.append(media_info)
+ return entries
+
+ def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
+ formats = []
+ hdcore_sign = 'hdcore=3.7.0'
+ f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
+ hds_host = hosts.get('hds')
+ if hds_host:
+ f4m_url = re.sub(r'(https?://)[^/]+', r'\1' + hds_host, f4m_url)
+ if 'hdcore=' not in f4m_url:
+ f4m_url += ('&' if '?' in f4m_url else '?') + hdcore_sign
+ f4m_formats = self._extract_f4m_formats(
+ f4m_url, video_id, f4m_id='hds', fatal=False)
+ for entry in f4m_formats:
+ entry.update({'extra_param_to_segment_url': hdcore_sign})
+ formats.extend(f4m_formats)
+ m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
+ hls_host = hosts.get('hls')
+ if hls_host:
+ m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url)
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ return formats
+
+ def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
+ query = compat_urlparse.urlparse(url).query
+ url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
+ mobj = re.search(
+ r'(?:(?:http|rtmp|rtsp)(?P<s>s)?:)?(?P<url>//[^?]+)', url)
+ url_base = mobj.group('url')
+ http_base_url = '%s%s:%s' % ('http', mobj.group('s') or '', url_base)
+ formats = []
+
+ def manifest_url(manifest):
+ m_url = '%s/%s' % (http_base_url, manifest)
+ if query:
+ m_url += '?%s' % query
+ return m_url
+
+ if 'm3u8' not in skip_protocols:
+ formats.extend(self._extract_m3u8_formats(
+ manifest_url('playlist.m3u8'), video_id, 'mp4',
+ m3u8_entry_protocol, m3u8_id='hls', fatal=False))
+ if 'f4m' not in skip_protocols:
+ formats.extend(self._extract_f4m_formats(
+ manifest_url('manifest.f4m'),
+ video_id, f4m_id='hds', fatal=False))
+ if 'dash' not in skip_protocols:
+ formats.extend(self._extract_mpd_formats(
+ manifest_url('manifest.mpd'),
+ video_id, mpd_id='dash', fatal=False))
+ if re.search(r'(?:/smil:|\.smil)', url_base):
+ if 'smil' not in skip_protocols:
+ rtmp_formats = self._extract_smil_formats(
+ manifest_url('jwplayer.smil'),
+ video_id, fatal=False)
+ for rtmp_format in rtmp_formats:
+ rtsp_format = rtmp_format.copy()
+ rtsp_format['url'] = '%s/%s' % (rtmp_format['url'], rtmp_format['play_path'])
+ del rtsp_format['play_path']
+ del rtsp_format['ext']
+ rtsp_format.update({
+ 'url': rtsp_format['url'].replace('rtmp://', 'rtsp://'),
+ 'format_id': rtmp_format['format_id'].replace('rtmp', 'rtsp'),
+ 'protocol': 'rtsp',
+ })
+ formats.extend([rtmp_format, rtsp_format])
+ else:
+ for protocol in ('rtmp', 'rtsp'):
+ if protocol not in skip_protocols:
+ formats.append({
+ 'url': '%s:%s' % (protocol, url_base),
+ 'format_id': protocol,
+ 'protocol': protocol,
+ })
+ return formats
+
+ def _find_jwplayer_data(self, webpage, video_id=None, transform_source=js_to_json):
+ mobj = re.search(
+ r'(?s)jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)(?!</script>).*?\.setup\s*\((?P<options>[^)]+)\)',
+ webpage)
+ if mobj:
+ try:
+ jwplayer_data = self._parse_json(mobj.group('options'),
+ video_id=video_id,
+ transform_source=transform_source)
+ except ExtractorError:
+ pass
+ else:
+ if isinstance(jwplayer_data, dict):
+ return jwplayer_data
+
+ def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
+ jwplayer_data = self._find_jwplayer_data(
+ webpage, video_id, transform_source=js_to_json)
+ return self._parse_jwplayer_data(
+ jwplayer_data, video_id, *args, **kwargs)
+
+ def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
+ m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
+ # JWPlayer backward compatibility: flattened playlists
+ # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
+ if 'playlist' not in jwplayer_data:
+ jwplayer_data = {'playlist': [jwplayer_data]}
+
+ entries = []
+
+ # JWPlayer backward compatibility: single playlist item
+ # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10
+ if not isinstance(jwplayer_data['playlist'], list):
+ jwplayer_data['playlist'] = [jwplayer_data['playlist']]
+
+ for video_data in jwplayer_data['playlist']:
+ # JWPlayer backward compatibility: flattened sources
+ # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
+ if 'sources' not in video_data:
+ video_data['sources'] = [video_data]
+
+ this_video_id = video_id or video_data['mediaid']
+
+ formats = self._parse_jwplayer_formats(
+ video_data['sources'], video_id=this_video_id, m3u8_id=m3u8_id,
+ mpd_id=mpd_id, rtmp_params=rtmp_params, base_url=base_url)
+
+ subtitles = {}
+ tracks = video_data.get('tracks')
+ if tracks and isinstance(tracks, list):
+ for track in tracks:
+ if not isinstance(track, dict):
+ continue
+ track_kind = track.get('kind')
+ if not track_kind or not isinstance(track_kind, compat_str):
+ continue
+ if track_kind.lower() not in ('captions', 'subtitles'):
+ continue
+ track_url = urljoin(base_url, track.get('file'))
+ if not track_url:
+ continue
+ subtitles.setdefault(track.get('label') or 'en', []).append({
+ 'url': self._proto_relative_url(track_url)
+ })
+
+ entry = {
+ 'id': this_video_id,
+ 'title': unescapeHTML(video_data['title'] if require_title else video_data.get('title')),
+ 'description': clean_html(video_data.get('description')),
+ 'thumbnail': urljoin(base_url, self._proto_relative_url(video_data.get('image'))),
+ 'timestamp': int_or_none(video_data.get('pubdate')),
+ 'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
+ 'subtitles': subtitles,
+ }
+ # https://github.com/jwplayer/jwplayer/blob/master/src/js/utils/validator.js#L32
+ if len(formats) == 1 and re.search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', formats[0]['url']):
+ entry.update({
+ '_type': 'url_transparent',
+ 'url': formats[0]['url'],
+ })
+ else:
+ self._sort_formats(formats)
+ entry['formats'] = formats
+ entries.append(entry)
+ if len(entries) == 1:
+ return entries[0]
+ else:
+ return self.playlist_result(entries)
+
+ def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
+ m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
+ urls = []
+ formats = []
+ for source in jwplayer_sources_data:
+ if not isinstance(source, dict):
+ continue
+ source_url = urljoin(
+ base_url, self._proto_relative_url(source.get('file')))
+ if not source_url or source_url in urls:
+ continue
+ urls.append(source_url)
+ source_type = source.get('type') or ''
+ ext = mimetype2ext(source_type) or determine_ext(source_url)
+ if source_type == 'hls' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ source_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id=m3u8_id, fatal=False))
+ elif source_type == 'dash' or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ source_url, video_id, mpd_id=mpd_id, fatal=False))
+ elif ext == 'smil':
+ formats.extend(self._extract_smil_formats(
+ source_url, video_id, fatal=False))
+ # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
+ elif source_type.startswith('audio') or ext in (
+ 'oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
+ formats.append({
+ 'url': source_url,
+ 'vcodec': 'none',
+ 'ext': ext,
+ })
+ else:
+ height = int_or_none(source.get('height'))
+ if height is None:
+ # Often no height is provided but there is a label in
+ # format like "1080p", "720p SD", or 1080.
+ height = int_or_none(self._search_regex(
+ r'^(\d{3,4})[pP]?(?:\b|$)', compat_str(source.get('label') or ''),
+ 'height', default=None))
+ a_format = {
+ 'url': source_url,
+ 'width': int_or_none(source.get('width')),
+ 'height': height,
+ 'tbr': int_or_none(source.get('bitrate')),
+ 'ext': ext,
+ }
+ if source_url.startswith('rtmp'):
+ a_format['ext'] = 'flv'
+ # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
+ # of jwplayer.flash.swf
+ rtmp_url_parts = re.split(
+ r'((?:mp4|mp3|flv):)', source_url, 1)
+ if len(rtmp_url_parts) == 3:
+ rtmp_url, prefix, play_path = rtmp_url_parts
+ a_format.update({
+ 'url': rtmp_url,
+ 'play_path': prefix + play_path,
+ })
+ if rtmp_params:
+ a_format.update(rtmp_params)
+ formats.append(a_format)
+ return formats
+
+ def _live_title(self, name):
+ """ Generate the title for a live video """
+ now = datetime.datetime.now()
+ now_str = now.strftime('%Y-%m-%d %H:%M')
+ return name + ' ' + now_str
+
+ def _int(self, v, name, fatal=False, **kwargs):
+ res = int_or_none(v, **kwargs)
+ if 'get_attr' in kwargs:
+ print(getattr(v, kwargs['get_attr']))
+ if res is None:
+ msg = 'Failed to extract %s: Could not parse value %r' % (name, v)
+ if fatal:
+ raise ExtractorError(msg)
+ else:
+ self._downloader.report_warning(msg)
+ return res
+
+ def _float(self, v, name, fatal=False, **kwargs):
+ res = float_or_none(v, **kwargs)
+ if res is None:
+ msg = 'Failed to extract %s: Could not parse value %r' % (name, v)
+ if fatal:
+ raise ExtractorError(msg)
+ else:
+ self._downloader.report_warning(msg)
+ return res
+
+ def _set_cookie(self, domain, name, value, expire_time=None, port=None,
+ path='/', secure=False, discard=False, rest={}, **kwargs):
+ cookie = compat_cookiejar_Cookie(
+ 0, name, value, port, port is not None, domain, True,
+ domain.startswith('.'), path, True, secure, expire_time,
+ discard, None, None, rest)
+ self._downloader.cookiejar.set_cookie(cookie)
+
+ def _get_cookies(self, url):
+ """ Return a compat_cookies.SimpleCookie with the cookies for the url """
+ req = sanitized_Request(url)
+ self._downloader.cookiejar.add_cookie_header(req)
+ return compat_cookies.SimpleCookie(req.get_header('Cookie'))
+
+ def _apply_first_set_cookie_header(self, url_handle, cookie):
+ """
+ Apply first Set-Cookie header instead of the last. Experimental.
+
+ Some sites (e.g. [1-3]) may serve two cookies under the same name
+ in Set-Cookie header and expect the first (old) one to be set rather
+ than second (new). However, as of RFC6265 the newer one cookie
+ should be set into cookie store what actually happens.
+ We will workaround this issue by resetting the cookie to
+ the first one manually.
+ 1. https://new.vk.com/
+ 2. https://github.com/ytdl-org/youtube-dl/issues/9841#issuecomment-227871201
+ 3. https://learning.oreilly.com/
+ """
+ for header, cookies in url_handle.headers.items():
+ if header.lower() != 'set-cookie':
+ continue
+ if sys.version_info[0] >= 3:
+ cookies = cookies.encode('iso-8859-1')
+ cookies = cookies.decode('utf-8')
+ cookie_value = re.search(
+ r'%s=(.+?);.*?\b[Dd]omain=(.+?)(?:[,;]|$)' % cookie, cookies)
+ if cookie_value:
+ value, domain = cookie_value.groups()
+ self._set_cookie(domain, cookie, value)
+ break
+
+ def get_testcases(self, include_onlymatching=False):
+ t = getattr(self, '_TEST', None)
+ if t:
+ assert not hasattr(self, '_TESTS'), \
+ '%s has _TEST and _TESTS' % type(self).__name__
+ tests = [t]
+ else:
+ tests = getattr(self, '_TESTS', [])
+ for t in tests:
+ if not include_onlymatching and t.get('only_matching', False):
+ continue
+ t['name'] = type(self).__name__[:-len('IE')]
+ yield t
+
+ def is_suitable(self, age_limit):
+ """ Test whether the extractor is generally suitable for the given
+ age limit (i.e. pornographic sites are not, all others usually are) """
+
+ any_restricted = False
+ for tc in self.get_testcases(include_onlymatching=False):
+ if tc.get('playlist', []):
+ tc = tc['playlist'][0]
+ is_restricted = age_restricted(
+ tc.get('info_dict', {}).get('age_limit'), age_limit)
+ if not is_restricted:
+ return True
+ any_restricted = any_restricted or is_restricted
+ return not any_restricted
+
+ def extract_subtitles(self, *args, **kwargs):
+ if (self._downloader.params.get('writesubtitles', False)
+ or self._downloader.params.get('listsubtitles')):
+ return self._get_subtitles(*args, **kwargs)
+ return {}
+
+ def _get_subtitles(self, *args, **kwargs):
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ @staticmethod
+ def _merge_subtitle_items(subtitle_list1, subtitle_list2):
+ """ Merge subtitle items for one language. Items with duplicated URLs
+ will be dropped. """
+ list1_urls = set([item['url'] for item in subtitle_list1])
+ ret = list(subtitle_list1)
+ ret.extend([item for item in subtitle_list2 if item['url'] not in list1_urls])
+ return ret
+
+ @classmethod
+ def _merge_subtitles(cls, subtitle_dict1, subtitle_dict2):
+ """ Merge two subtitle dictionaries, language by language. """
+ ret = dict(subtitle_dict1)
+ for lang in subtitle_dict2:
+ ret[lang] = cls._merge_subtitle_items(subtitle_dict1.get(lang, []), subtitle_dict2[lang])
+ return ret
+
+ def extract_automatic_captions(self, *args, **kwargs):
+ if (self._downloader.params.get('writeautomaticsub', False)
+ or self._downloader.params.get('listsubtitles')):
+ return self._get_automatic_captions(*args, **kwargs)
+ return {}
+
+ def _get_automatic_captions(self, *args, **kwargs):
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ def mark_watched(self, *args, **kwargs):
+ if (self._downloader.params.get('mark_watched', False)
+ and (self._get_login_info()[0] is not None
+ or self._downloader.params.get('cookiefile') is not None)):
+ self._mark_watched(*args, **kwargs)
+
+ def _mark_watched(self, *args, **kwargs):
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ def geo_verification_headers(self):
+ headers = {}
+ geo_verification_proxy = self._downloader.params.get('geo_verification_proxy')
+ if geo_verification_proxy:
+ headers['Ytdl-request-proxy'] = geo_verification_proxy
+ return headers
+
+ def _generic_id(self, url):
+ return compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
+
+ def _generic_title(self, url):
+ return compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0])
+
+
+class SearchInfoExtractor(InfoExtractor):
+ """
+ Base class for paged search queries extractors.
+ They accept URLs in the format _SEARCH_KEY(|all|[0-9]):{query}
+ Instances should define _SEARCH_KEY and _MAX_RESULTS.
+ """
+
+ @classmethod
+ def _make_valid_url(cls):
+ return r'%s(?P<prefix>|[1-9][0-9]*|all):(?P<query>[\s\S]+)' % cls._SEARCH_KEY
+
+ @classmethod
+ def suitable(cls, url):
+ return re.match(cls._make_valid_url(), url) is not None
+
+ def _real_extract(self, query):
+ mobj = re.match(self._make_valid_url(), query)
+ if mobj is None:
+ raise ExtractorError('Invalid search query "%s"' % query)
+
+ prefix = mobj.group('prefix')
+ query = mobj.group('query')
+ if prefix == '':
+ return self._get_n_results(query, 1)
+ elif prefix == 'all':
+ return self._get_n_results(query, self._MAX_RESULTS)
+ else:
+ n = int(prefix)
+ if n <= 0:
+ raise ExtractorError('invalid download number %s for query "%s"' % (n, query))
+ elif n > self._MAX_RESULTS:
+ self._downloader.report_warning('%s returns max %i results (you requested %i)' % (self._SEARCH_KEY, self._MAX_RESULTS, n))
+ n = self._MAX_RESULTS
+ return self._get_n_results(query, n)
+
+ def _get_n_results(self, query, n):
+ """Get a specified number of results for a query"""
+ raise NotImplementedError('This method must be implemented by subclasses')
+
+ @property
+ def SEARCH_KEY(self):
+ return self._SEARCH_KEY
diff --git a/youtube_dlc/extractor/commonmistakes.py b/youtube_dlc/extractor/commonmistakes.py
new file mode 100644
index 000000000..933b89eb3
--- /dev/null
+++ b/youtube_dlc/extractor/commonmistakes.py
@@ -0,0 +1,50 @@
+from __future__ import unicode_literals
+
+import sys
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class CommonMistakesIE(InfoExtractor):
+ IE_DESC = False # Do not list
+ _VALID_URL = r'''(?x)
+ (?:url|URL)$
+ '''
+
+ _TESTS = [{
+ 'url': 'url',
+ 'only_matching': True,
+ }, {
+ 'url': 'URL',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ msg = (
+ 'You\'ve asked youtube-dlc to download the URL "%s". '
+ 'That doesn\'t make any sense. '
+ 'Simply remove the parameter in your command or configuration.'
+ ) % url
+ if not self._downloader.params.get('verbose'):
+ msg += ' Add -v to the command line to see what arguments and configuration youtube-dlc got.'
+ raise ExtractorError(msg, expected=True)
+
+
+class UnicodeBOMIE(InfoExtractor):
+ IE_DESC = False
+ _VALID_URL = r'(?P<bom>\ufeff)(?P<id>.*)$'
+
+ # Disable test for python 3.2 since BOM is broken in re in this version
+ # (see https://github.com/ytdl-org/youtube-dl/issues/9751)
+ _TESTS = [] if (3, 0) < sys.version_info <= (3, 3) else [{
+ 'url': '\ufeffhttp://www.youtube.com/watch?v=BaW_jenozKc',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ real_url = self._match_id(url)
+ self.report_warning(
+ 'Your URL starts with a Byte Order Mark (BOM). '
+ 'Removing the BOM and looking for "%s" ...' % real_url)
+ return self.url_result(real_url)
diff --git a/youtube_dl/extractor/commonprotocols.py b/youtube_dlc/extractor/commonprotocols.py
index d98331a4e..d98331a4e 100644
--- a/youtube_dl/extractor/commonprotocols.py
+++ b/youtube_dlc/extractor/commonprotocols.py
diff --git a/youtube_dl/extractor/condenast.py b/youtube_dlc/extractor/condenast.py
index ed278fefc..ed278fefc 100644
--- a/youtube_dl/extractor/condenast.py
+++ b/youtube_dlc/extractor/condenast.py
diff --git a/youtube_dlc/extractor/contv.py b/youtube_dlc/extractor/contv.py
new file mode 100644
index 000000000..84b462d40
--- /dev/null
+++ b/youtube_dlc/extractor/contv.py
@@ -0,0 +1,118 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ int_or_none,
+)
+
+
+class CONtvIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?contv\.com/details-movie/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://www.contv.com/details-movie/CEG10022949/days-of-thrills-&-laughter',
+ 'info_dict': {
+ 'id': 'CEG10022949',
+ 'ext': 'mp4',
+ 'title': 'Days Of Thrills & Laughter',
+ 'description': 'md5:5d6b3d0b1829bb93eb72898c734802eb',
+ 'upload_date': '20180703',
+ 'timestamp': 1530634789.61,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.contv.com/details-movie/CLIP-show_fotld_bts/fight-of-the-living-dead:-behind-the-scenes-bites',
+ 'info_dict': {
+ 'id': 'CLIP-show_fotld_bts',
+ 'title': 'Fight of the Living Dead: Behind the Scenes Bites',
+ },
+ 'playlist_mincount': 7,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ details = self._download_json(
+ 'http://metax.contv.live.junctiontv.net/metax/2.5/details/' + video_id,
+ video_id, query={'device': 'web'})
+
+ if details.get('type') == 'episodic':
+ seasons = self._download_json(
+ 'http://metax.contv.live.junctiontv.net/metax/2.5/seriesfeed/json/' + video_id,
+ video_id)
+ entries = []
+ for season in seasons:
+ for episode in season.get('episodes', []):
+ episode_id = episode.get('id')
+ if not episode_id:
+ continue
+ entries.append(self.url_result(
+ 'https://www.contv.com/details-movie/' + episode_id,
+ CONtvIE.ie_key(), episode_id))
+ return self.playlist_result(entries, video_id, details.get('title'))
+
+ m_details = details['details']
+ title = details['title']
+
+ formats = []
+
+ media_hls_url = m_details.get('media_hls_url')
+ if media_hls_url:
+ formats.extend(self._extract_m3u8_formats(
+ media_hls_url, video_id, 'mp4',
+ m3u8_id='hls', fatal=False))
+
+ media_mp4_url = m_details.get('media_mp4_url')
+ if media_mp4_url:
+ formats.append({
+ 'format_id': 'http',
+ 'url': media_mp4_url,
+ })
+
+ self._sort_formats(formats)
+
+ subtitles = {}
+ captions = m_details.get('captions') or {}
+ for caption_url in captions.values():
+ subtitles.setdefault('en', []).append({
+ 'url': caption_url
+ })
+
+ thumbnails = []
+ for image in m_details.get('images', []):
+ image_url = image.get('url')
+ if not image_url:
+ continue
+ thumbnails.append({
+ 'url': image_url,
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ })
+
+ description = None
+ for p in ('large_', 'medium_', 'small_', ''):
+ d = m_details.get(p + 'description')
+ if d:
+ description = d
+ break
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'description': description,
+ 'timestamp': float_or_none(details.get('metax_added_on'), 1000),
+ 'subtitles': subtitles,
+ 'duration': float_or_none(m_details.get('duration'), 1000),
+ 'view_count': int_or_none(details.get('num_watched')),
+ 'like_count': int_or_none(details.get('num_fav')),
+ 'categories': details.get('category'),
+ 'tags': details.get('tags'),
+ 'season_number': int_or_none(details.get('season')),
+ 'episode_number': int_or_none(details.get('episode')),
+ 'release_year': int_or_none(details.get('pub_year')),
+ }
diff --git a/youtube_dlc/extractor/corus.py b/youtube_dlc/extractor/corus.py
new file mode 100644
index 000000000..e11aadf14
--- /dev/null
+++ b/youtube_dlc/extractor/corus.py
@@ -0,0 +1,160 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .theplatform import ThePlatformFeedIE
+from ..utils import (
+ dict_get,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+)
+
+
+class CorusIE(ThePlatformFeedIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?
+ (?P<domain>
+ (?:
+ globaltv|
+ etcanada|
+ seriesplus|
+ wnetwork|
+ ytv
+ )\.com|
+ (?:
+ hgtv|
+ foodnetwork|
+ slice|
+ history|
+ showcase|
+ bigbrothercanada|
+ abcspark|
+ disney(?:channel|lachaine)
+ )\.ca
+ )
+ /(?:[^/]+/)*
+ (?:
+ video\.html\?.*?\bv=|
+ videos?/(?:[^/]+/)*(?:[a-z0-9-]+-)?
+ )
+ (?P<id>
+ [\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}|
+ (?:[A-Z]{4})?\d{12,20}
+ )
+ '''
+ _TESTS = [{
+ 'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/',
+ 'info_dict': {
+ 'id': '870923331648',
+ 'ext': 'mp4',
+ 'title': 'Movie Night Popcorn with Bryan',
+ 'description': 'Bryan whips up homemade popcorn, the old fashion way for Jojo and Lincoln.',
+ 'upload_date': '20170206',
+ 'timestamp': 1486392197,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Failed to parse JSON'],
+ }, {
+ 'url': 'http://www.foodnetwork.ca/shows/chopped/video/episode/chocolate-obsession/video.html?v=872683587753',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://etcanada.com/video/873675331955/meet-the-survivor-game-changers-castaways-part-2/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.history.ca/the-world-without-canada/video/full-episodes/natural-resources/video.html?v=955054659646#video',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.showcase.ca/eyewitness/video/eyewitness++106/video.html?v=955070531919&p=1&s=da#video',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.bigbrothercanada.ca/video/1457812035894/',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.bigbrothercanada.ca/video/big-brother-canada-704/1457812035894/',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.seriesplus.com/emissions/dre-mary-mort-sur-ordonnance/videos/deux-coeurs-battant/SERP0055626330000200/',
+ 'only_matching': True
+ }, {
+ 'url': 'https://www.disneychannel.ca/shows/gabby-duran-the-unsittables/video/crybaby-duran-clip/2f557eec-0588-11ea-ae2b-e2c6776b770e/',
+ 'only_matching': True
+ }]
+ _GEO_BYPASS = False
+ _SITE_MAP = {
+ 'globaltv': 'series',
+ 'etcanada': 'series',
+ 'foodnetwork': 'food',
+ 'bigbrothercanada': 'series',
+ 'disneychannel': 'disneyen',
+ 'disneylachaine': 'disneyfr',
+ }
+
+ def _real_extract(self, url):
+ domain, video_id = re.match(self._VALID_URL, url).groups()
+ site = domain.split('.')[0]
+ path = self._SITE_MAP.get(site, site)
+ if path != 'series':
+ path = 'migration/' + path
+ video = self._download_json(
+ 'https://globalcontent.corusappservices.com/templates/%s/playlist/' % path,
+ video_id, query={'byId': video_id},
+ headers={'Accept': 'application/json'})[0]
+ title = video['title']
+
+ formats = []
+ for source in video.get('sources', []):
+ smil_url = source.get('file')
+ if not smil_url:
+ continue
+ source_type = source.get('type')
+ note = 'Downloading%s smil file' % (' ' + source_type if source_type else '')
+ resp = self._download_webpage(
+ smil_url, video_id, note, fatal=False,
+ headers=self.geo_verification_headers())
+ if not resp:
+ continue
+ error = self._parse_json(resp, video_id, fatal=False)
+ if error:
+ if error.get('exception') == 'GeoLocationBlocked':
+ self.raise_geo_restricted(countries=['CA'])
+ raise ExtractorError(error['description'])
+ smil = self._parse_xml(resp, video_id, fatal=False)
+ if smil is None:
+ continue
+ namespace = self._parse_smil_namespace(smil)
+ formats.extend(self._parse_smil_formats(
+ smil, smil_url, video_id, namespace))
+ if not formats and video.get('drm'):
+ raise ExtractorError('This video is DRM protected.', expected=True)
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for track in video.get('tracks', []):
+ track_url = track.get('file')
+ if not track_url:
+ continue
+ lang = 'fr' if site in ('disneylachaine', 'seriesplus') else 'en'
+ subtitles.setdefault(lang, []).append({'url': track_url})
+
+ metadata = video.get('metadata') or {}
+ get_number = lambda x: int_or_none(video.get('pl1$' + x) or metadata.get(x + 'Number'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': dict_get(video, ('defaultThumbnailUrl', 'thumbnail', 'image')),
+ 'description': video.get('description'),
+ 'timestamp': int_or_none(video.get('availableDate'), 1000),
+ 'subtitles': subtitles,
+ 'duration': float_or_none(metadata.get('duration')),
+ 'series': dict_get(video, ('show', 'pl1$show')),
+ 'season_number': get_number('season'),
+ 'episode_number': get_number('episode'),
+ }
diff --git a/youtube_dl/extractor/coub.py b/youtube_dlc/extractor/coub.py
index 6ea03e65c..6ea03e65c 100644
--- a/youtube_dl/extractor/coub.py
+++ b/youtube_dlc/extractor/coub.py
diff --git a/youtube_dl/extractor/cracked.py b/youtube_dlc/extractor/cracked.py
index f77a68ece..f77a68ece 100644
--- a/youtube_dl/extractor/cracked.py
+++ b/youtube_dlc/extractor/cracked.py
diff --git a/youtube_dl/extractor/crackle.py b/youtube_dlc/extractor/crackle.py
index 49bf3a4f9..49bf3a4f9 100644
--- a/youtube_dl/extractor/crackle.py
+++ b/youtube_dlc/extractor/crackle.py
diff --git a/youtube_dl/extractor/crooksandliars.py b/youtube_dlc/extractor/crooksandliars.py
index 7fb782db7..7fb782db7 100644
--- a/youtube_dl/extractor/crooksandliars.py
+++ b/youtube_dlc/extractor/crooksandliars.py
diff --git a/youtube_dlc/extractor/crunchyroll.py b/youtube_dlc/extractor/crunchyroll.py
new file mode 100644
index 000000000..bc2d1fa8b
--- /dev/null
+++ b/youtube_dlc/extractor/crunchyroll.py
@@ -0,0 +1,686 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import json
+import zlib
+
+from hashlib import sha1
+from math import pow, sqrt, floor
+from .common import InfoExtractor
+from .vrv import VRVIE
+from ..compat import (
+ compat_b64decode,
+ compat_etree_Element,
+ compat_etree_fromstring,
+ compat_str,
+ compat_urllib_parse_urlencode,
+ compat_urllib_request,
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ bytes_to_intlist,
+ extract_attributes,
+ float_or_none,
+ intlist_to_bytes,
+ int_or_none,
+ lowercase_escape,
+ merge_dicts,
+ remove_end,
+ sanitized_Request,
+ urlencode_postdata,
+ xpath_text,
+)
+from ..aes import (
+ aes_cbc_decrypt,
+)
+
+
+class CrunchyrollBaseIE(InfoExtractor):
+ _LOGIN_URL = 'https://www.crunchyroll.com/login'
+ _LOGIN_FORM = 'login_form'
+ _NETRC_MACHINE = 'crunchyroll'
+
+ def _call_rpc_api(self, method, video_id, note=None, data=None):
+ data = data or {}
+ data['req'] = 'RpcApi' + method
+ data = compat_urllib_parse_urlencode(data).encode('utf-8')
+ return self._download_xml(
+ 'https://www.crunchyroll.com/xml/',
+ video_id, note, fatal=False, data=data, headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ })
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login page')
+
+ def is_logged(webpage):
+ return 'href="/logout"' in webpage
+
+ # Already logged in
+ if is_logged(login_page):
+ return
+
+ login_form_str = self._search_regex(
+ r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LOGIN_FORM,
+ login_page, 'login form', group='form')
+
+ post_url = extract_attributes(login_form_str).get('action')
+ if not post_url:
+ post_url = self._LOGIN_URL
+ elif not post_url.startswith('http'):
+ post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
+
+ login_form = self._form_hidden_inputs(self._LOGIN_FORM, login_page)
+
+ login_form.update({
+ 'login_form[name]': username,
+ 'login_form[password]': password,
+ })
+
+ response = self._download_webpage(
+ post_url, None, 'Logging in', 'Wrong login info',
+ data=urlencode_postdata(login_form),
+ headers={'Content-Type': 'application/x-www-form-urlencoded'})
+
+ # Successful login
+ if is_logged(response):
+ return
+
+ error = self._html_search_regex(
+ '(?s)<ul[^>]+class=["\']messages["\'][^>]*>(.+?)</ul>',
+ response, 'error message', default=None)
+ if error:
+ raise ExtractorError('Unable to login: %s' % error, expected=True)
+
+ raise ExtractorError('Unable to log in')
+
+ def _real_initialize(self):
+ self._login()
+
+ @staticmethod
+ def _add_skip_wall(url):
+ parsed_url = compat_urlparse.urlparse(url)
+ qs = compat_urlparse.parse_qs(parsed_url.query)
+ # Always force skip_wall to bypass maturity wall, namely 18+ confirmation message:
+ # > This content may be inappropriate for some people.
+ # > Are you sure you want to continue?
+ # since it's not disabled by default in crunchyroll account's settings.
+ # See https://github.com/ytdl-org/youtube-dl/issues/7202.
+ qs['skip_wall'] = ['1']
+ return compat_urlparse.urlunparse(
+ parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
+
+
+class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
+ IE_NAME = 'crunchyroll'
+ _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
+ _TESTS = [{
+ 'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
+ 'info_dict': {
+ 'id': '645513',
+ 'ext': 'mp4',
+ 'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
+ 'description': 'md5:2d17137920c64f2f49981a7797d275ef',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Yomiuri Telecasting Corporation (YTV)',
+ 'upload_date': '20131013',
+ 'url': 're:(?!.*&amp)',
+ },
+ 'params': {
+ # rtmp
+ 'skip_download': True,
+ },
+ 'skip': 'Video gone',
+ }, {
+ 'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1',
+ 'info_dict': {
+ 'id': '589804',
+ 'ext': 'flv',
+ 'title': 'Culture Japan Episode 1 – Rebuilding Japan after the 3.11',
+ 'description': 'md5:2fbc01f90b87e8e9137296f37b461c12',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Danny Choo Network',
+ 'upload_date': '20120213',
+ },
+ 'params': {
+ # rtmp
+ 'skip_download': True,
+ },
+ 'skip': 'Video gone',
+ }, {
+ 'url': 'http://www.crunchyroll.com/rezero-starting-life-in-another-world-/episode-5-the-morning-of-our-promise-is-still-distant-702409',
+ 'info_dict': {
+ 'id': '702409',
+ 'ext': 'mp4',
+ 'title': compat_str,
+ 'description': compat_str,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Re:Zero Partners',
+ 'timestamp': 1462098900,
+ 'upload_date': '20160501',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.crunchyroll.com/konosuba-gods-blessing-on-this-wonderful-world/episode-1-give-me-deliverance-from-this-judicial-injustice-727589',
+ 'info_dict': {
+ 'id': '727589',
+ 'ext': 'mp4',
+ 'title': compat_str,
+ 'description': compat_str,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Kadokawa Pictures Inc.',
+ 'timestamp': 1484130900,
+ 'upload_date': '20170111',
+ 'series': compat_str,
+ 'season': "KONOSUBA -God's blessing on this wonderful world! 2",
+ 'season_number': 2,
+ 'episode': 'Give Me Deliverance From This Judicial Injustice!',
+ 'episode_number': 1,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
+ 'only_matching': True,
+ }, {
+ # geo-restricted (US), 18+ maturity wall, non-premium available
+ 'url': 'http://www.crunchyroll.com/cosplay-complex-ova/episode-1-the-birth-of-the-cosplay-club-565617',
+ 'only_matching': True,
+ }, {
+ # A description with double quotes
+ 'url': 'http://www.crunchyroll.com/11eyes/episode-1-piros-jszaka-red-night-535080',
+ 'info_dict': {
+ 'id': '535080',
+ 'ext': 'mp4',
+ 'title': compat_str,
+ 'description': compat_str,
+ 'uploader': 'Marvelous AQL Inc.',
+ 'timestamp': 1255512600,
+ 'upload_date': '20091014',
+ },
+ 'params': {
+ # Just test metadata extraction
+ 'skip_download': True,
+ },
+ }, {
+ # make sure we can extract an uploader name that's not a link
+ 'url': 'http://www.crunchyroll.com/hakuoki-reimeiroku/episode-1-dawn-of-the-divine-warriors-606899',
+ 'info_dict': {
+ 'id': '606899',
+ 'ext': 'mp4',
+ 'title': 'Hakuoki Reimeiroku Episode 1 – Dawn of the Divine Warriors',
+ 'description': 'Ryunosuke was left to die, but Serizawa-san asked him a simple question "Do you want to live?"',
+ 'uploader': 'Geneon Entertainment',
+ 'upload_date': '20120717',
+ },
+ 'params': {
+ # just test metadata extraction
+ 'skip_download': True,
+ },
+ 'skip': 'Video gone',
+ }, {
+ # A video with a vastly different season name compared to the series name
+ 'url': 'http://www.crunchyroll.com/nyarko-san-another-crawling-chaos/episode-1-test-590532',
+ 'info_dict': {
+ 'id': '590532',
+ 'ext': 'mp4',
+ 'title': compat_str,
+ 'description': compat_str,
+ 'uploader': 'TV TOKYO',
+ 'timestamp': 1330956000,
+ 'upload_date': '20120305',
+ 'series': 'Nyarko-san: Another Crawling Chaos',
+ 'season': 'Haiyoru! Nyaruani (ONA)',
+ },
+ 'params': {
+ # Just test metadata extraction
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.crunchyroll.com/media-723735',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.crunchyroll.com/en-gb/mob-psycho-100/episode-2-urban-legends-encountering-rumors-780921',
+ 'only_matching': True,
+ }]
+
+ _FORMAT_IDS = {
+ '360': ('60', '106'),
+ '480': ('61', '106'),
+ '720': ('62', '106'),
+ '1080': ('80', '108'),
+ }
+
+ def _download_webpage(self, url_or_request, *args, **kwargs):
+ request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
+ else sanitized_Request(url_or_request))
+ # Accept-Language must be set explicitly to accept any language to avoid issues
+ # similar to https://github.com/ytdl-org/youtube-dl/issues/6797.
+ # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
+ # should be imposed or not (from what I can see it just takes the first language
+ # ignoring the priority and requires it to correspond the IP). By the way this causes
+ # Crunchyroll to not work in georestriction cases in some browsers that don't place
+ # the locale lang first in header. However allowing any language seems to workaround the issue.
+ request.add_header('Accept-Language', '*')
+ return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)
+
+ def _decrypt_subtitles(self, data, iv, id):
+ data = bytes_to_intlist(compat_b64decode(data))
+ iv = bytes_to_intlist(compat_b64decode(iv))
+ id = int(id)
+
+ def obfuscate_key_aux(count, modulo, start):
+ output = list(start)
+ for _ in range(count):
+ output.append(output[-1] + output[-2])
+ # cut off start values
+ output = output[2:]
+ output = list(map(lambda x: x % modulo + 33, output))
+ return output
+
+ def obfuscate_key(key):
+ num1 = int(floor(pow(2, 25) * sqrt(6.9)))
+ num2 = (num1 ^ key) << 5
+ num3 = key ^ num1
+ num4 = num3 ^ (num3 >> 3) ^ num2
+ prefix = intlist_to_bytes(obfuscate_key_aux(20, 97, (1, 2)))
+ shaHash = bytes_to_intlist(sha1(prefix + str(num4).encode('ascii')).digest())
+ # Extend 160 Bit hash to 256 Bit
+ return shaHash + [0] * 12
+
+ key = obfuscate_key(id)
+
+ decrypted_data = intlist_to_bytes(aes_cbc_decrypt(data, key, iv))
+ return zlib.decompress(decrypted_data)
+
+ def _convert_subtitles_to_srt(self, sub_root):
+ output = ''
+
+ for i, event in enumerate(sub_root.findall('./events/event'), 1):
+ start = event.attrib['start'].replace('.', ',')
+ end = event.attrib['end'].replace('.', ',')
+ text = event.attrib['text'].replace('\\N', '\n')
+ output += '%d\n%s --> %s\n%s\n\n' % (i, start, end, text)
+ return output
+
+ def _convert_subtitles_to_ass(self, sub_root):
+ output = ''
+
+ def ass_bool(strvalue):
+ assvalue = '0'
+ if strvalue == '1':
+ assvalue = '-1'
+ return assvalue
+
+ output = '[Script Info]\n'
+ output += 'Title: %s\n' % sub_root.attrib['title']
+ output += 'ScriptType: v4.00+\n'
+ output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style']
+ output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x']
+ output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y']
+ output += """
+[V4+ Styles]
+Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
+"""
+ for style in sub_root.findall('./styles/style'):
+ output += 'Style: ' + style.attrib['name']
+ output += ',' + style.attrib['font_name']
+ output += ',' + style.attrib['font_size']
+ output += ',' + style.attrib['primary_colour']
+ output += ',' + style.attrib['secondary_colour']
+ output += ',' + style.attrib['outline_colour']
+ output += ',' + style.attrib['back_colour']
+ output += ',' + ass_bool(style.attrib['bold'])
+ output += ',' + ass_bool(style.attrib['italic'])
+ output += ',' + ass_bool(style.attrib['underline'])
+ output += ',' + ass_bool(style.attrib['strikeout'])
+ output += ',' + style.attrib['scale_x']
+ output += ',' + style.attrib['scale_y']
+ output += ',' + style.attrib['spacing']
+ output += ',' + style.attrib['angle']
+ output += ',' + style.attrib['border_style']
+ output += ',' + style.attrib['outline']
+ output += ',' + style.attrib['shadow']
+ output += ',' + style.attrib['alignment']
+ output += ',' + style.attrib['margin_l']
+ output += ',' + style.attrib['margin_r']
+ output += ',' + style.attrib['margin_v']
+ output += ',' + style.attrib['encoding']
+ output += '\n'
+
+ output += """
+[Events]
+Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
+"""
+ for event in sub_root.findall('./events/event'):
+ output += 'Dialogue: 0'
+ output += ',' + event.attrib['start']
+ output += ',' + event.attrib['end']
+ output += ',' + event.attrib['style']
+ output += ',' + event.attrib['name']
+ output += ',' + event.attrib['margin_l']
+ output += ',' + event.attrib['margin_r']
+ output += ',' + event.attrib['margin_v']
+ output += ',' + event.attrib['effect']
+ output += ',' + event.attrib['text']
+ output += '\n'
+
+ return output
+
+ def _extract_subtitles(self, subtitle):
+ sub_root = compat_etree_fromstring(subtitle)
+ return [{
+ 'ext': 'srt',
+ 'data': self._convert_subtitles_to_srt(sub_root),
+ }, {
+ 'ext': 'ass',
+ 'data': self._convert_subtitles_to_ass(sub_root),
+ }]
+
+ def _get_subtitles(self, video_id, webpage):
+ subtitles = {}
+ for sub_id, sub_name in re.findall(r'\bssid=([0-9]+)"[^>]+?\btitle="([^"]+)', webpage):
+ sub_doc = self._call_rpc_api(
+ 'Subtitle_GetXml', video_id,
+ 'Downloading subtitles for ' + sub_name, data={
+ 'subtitle_script_id': sub_id,
+ })
+ if not isinstance(sub_doc, compat_etree_Element):
+ continue
+ sid = sub_doc.get('id')
+ iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
+ data = xpath_text(sub_doc, 'data', 'subtitle data')
+ if not sid or not iv or not data:
+ continue
+ subtitle = self._decrypt_subtitles(data, iv, sid).decode('utf-8')
+ lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
+ if not lang_code:
+ continue
+ subtitles[lang_code] = self._extract_subtitles(subtitle)
+ return subtitles
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('video_id')
+
+ if mobj.group('prefix') == 'm':
+ mobile_webpage = self._download_webpage(url, video_id, 'Downloading mobile webpage')
+ webpage_url = self._search_regex(r'<link rel="canonical" href="([^"]+)" />', mobile_webpage, 'webpage_url')
+ else:
+ webpage_url = 'http://www.' + mobj.group('url')
+
+ webpage = self._download_webpage(
+ self._add_skip_wall(webpage_url), video_id,
+ headers=self.geo_verification_headers())
+ note_m = self._html_search_regex(
+ r'<div class="showmedia-trailer-notice">(.+?)</div>',
+ webpage, 'trailer-notice', default='')
+ if note_m:
+ raise ExtractorError(note_m)
+
+ mobj = re.search(r'Page\.messaging_box_controller\.addItems\(\[(?P<msg>{.+?})\]\)', webpage)
+ if mobj:
+ msg = json.loads(mobj.group('msg'))
+ if msg.get('type') == 'error':
+ raise ExtractorError('crunchyroll returned error: %s' % msg['message_body'], expected=True)
+
+ if 'To view this, please log in to verify you are 18 or older.' in webpage:
+ self.raise_login_required()
+
+ media = self._parse_json(self._search_regex(
+ r'vilos\.config\.media\s*=\s*({.+?});',
+ webpage, 'vilos media', default='{}'), video_id)
+ media_metadata = media.get('metadata') or {}
+
+ language = self._search_regex(
+ r'(?:vilos\.config\.player\.language|LOCALE)\s*=\s*(["\'])(?P<lang>(?:(?!\1).)+)\1',
+ webpage, 'language', default=None, group='lang')
+
+ video_title = self._html_search_regex(
+ (r'(?s)<h1[^>]*>((?:(?!<h1).)*?<(?:span[^>]+itemprop=["\']title["\']|meta[^>]+itemprop=["\']position["\'])[^>]*>(?:(?!<h1).)+?)</h1>',
+ r'<title>(.+?),\s+-\s+.+? Crunchyroll'),
+ webpage, 'video_title', default=None)
+ if not video_title:
+ video_title = re.sub(r'^Watch\s+', '', self._og_search_description(webpage))
+ video_title = re.sub(r' {2,}', ' ', video_title)
+ video_description = (self._parse_json(self._html_search_regex(
+ r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
+ webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
+ if video_description:
+ video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
+ video_uploader = self._html_search_regex(
+ # try looking for both an uploader that's a link and one that's not
+ [r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
+ webpage, 'video_uploader', default=False)
+
+ formats = []
+ for stream in media.get('streams', []):
+ audio_lang = stream.get('audio_lang')
+ hardsub_lang = stream.get('hardsub_lang')
+ vrv_formats = self._extract_vrv_formats(
+ stream.get('url'), video_id, stream.get('format'),
+ audio_lang, hardsub_lang)
+ for f in vrv_formats:
+ if not hardsub_lang:
+ f['preference'] = 1
+ language_preference = 0
+ if audio_lang == language:
+ language_preference += 1
+ if hardsub_lang == language:
+ language_preference += 1
+ if language_preference:
+ f['language_preference'] = language_preference
+ formats.extend(vrv_formats)
+ if not formats:
+ available_fmts = []
+ for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
+ attrs = extract_attributes(a)
+ href = attrs.get('href')
+ if href and '/freetrial' in href:
+ continue
+ available_fmts.append(fmt)
+ if not available_fmts:
+ for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
+ available_fmts = re.findall(p, webpage)
+ if available_fmts:
+ break
+ if not available_fmts:
+ available_fmts = self._FORMAT_IDS.keys()
+ video_encode_ids = []
+
+ for fmt in available_fmts:
+ stream_quality, stream_format = self._FORMAT_IDS[fmt]
+ video_format = fmt + 'p'
+ stream_infos = []
+ streamdata = self._call_rpc_api(
+ 'VideoPlayer_GetStandardConfig', video_id,
+ 'Downloading media info for %s' % video_format, data={
+ 'media_id': video_id,
+ 'video_format': stream_format,
+ 'video_quality': stream_quality,
+ 'current_page': url,
+ })
+ if isinstance(streamdata, compat_etree_Element):
+ stream_info = streamdata.find('./{default}preload/stream_info')
+ if stream_info is not None:
+ stream_infos.append(stream_info)
+ stream_info = self._call_rpc_api(
+ 'VideoEncode_GetStreamInfo', video_id,
+ 'Downloading stream info for %s' % video_format, data={
+ 'media_id': video_id,
+ 'video_format': stream_format,
+ 'video_encode_quality': stream_quality,
+ })
+ if isinstance(stream_info, compat_etree_Element):
+ stream_infos.append(stream_info)
+ for stream_info in stream_infos:
+ video_encode_id = xpath_text(stream_info, './video_encode_id')
+ if video_encode_id in video_encode_ids:
+ continue
+ video_encode_ids.append(video_encode_id)
+
+ video_file = xpath_text(stream_info, './file')
+ if not video_file:
+ continue
+ if video_file.startswith('http'):
+ formats.extend(self._extract_m3u8_formats(
+ video_file, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ continue
+
+ video_url = xpath_text(stream_info, './host')
+ if not video_url:
+ continue
+ metadata = stream_info.find('./metadata')
+ format_info = {
+ 'format': video_format,
+ 'height': int_or_none(xpath_text(metadata, './height')),
+ 'width': int_or_none(xpath_text(metadata, './width')),
+ }
+
+ if '.fplive.net/' in video_url:
+ video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
+ parsed_video_url = compat_urlparse.urlparse(video_url)
+ direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
+ netloc='v.lvlt.crcdn.net',
+ path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
+ if self._is_valid_url(direct_video_url, video_id, video_format):
+ format_info.update({
+ 'format_id': 'http-' + video_format,
+ 'url': direct_video_url,
+ })
+ formats.append(format_info)
+ continue
+
+ format_info.update({
+ 'format_id': 'rtmp-' + video_format,
+ 'url': video_url,
+ 'play_path': video_file,
+ 'ext': 'flv',
+ })
+ formats.append(format_info)
+ self._sort_formats(formats, ('preference', 'language_preference', 'height', 'width', 'tbr', 'fps'))
+
+ metadata = self._call_rpc_api(
+ 'VideoPlayer_GetMediaMetadata', video_id,
+ note='Downloading media info', data={
+ 'media_id': video_id,
+ })
+
+ subtitles = {}
+ for subtitle in media.get('subtitles', []):
+ subtitle_url = subtitle.get('url')
+ if not subtitle_url:
+ continue
+ subtitles.setdefault(subtitle.get('language', 'enUS'), []).append({
+ 'url': subtitle_url,
+ 'ext': subtitle.get('format', 'ass'),
+ })
+ if not subtitles:
+ subtitles = self.extract_subtitles(video_id, webpage)
+
+ # webpage provide more accurate data than series_title from XML
+ series = self._html_search_regex(
+ r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
+ webpage, 'series', fatal=False)
+
+ season = episode = episode_number = duration = thumbnail = None
+
+ if isinstance(metadata, compat_etree_Element):
+ season = xpath_text(metadata, 'series_title')
+ episode = xpath_text(metadata, 'episode_title')
+ episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
+ duration = float_or_none(media_metadata.get('duration'), 1000)
+ thumbnail = xpath_text(metadata, 'episode_image_url')
+
+ if not episode:
+ episode = media_metadata.get('title')
+ if not episode_number:
+ episode_number = int_or_none(media_metadata.get('episode_number'))
+ if not thumbnail:
+ thumbnail = media_metadata.get('thumbnail', {}).get('url')
+
+ season_number = int_or_none(self._search_regex(
+ r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
+ webpage, 'season number', default=None))
+
+ info = self._search_json_ld(webpage, video_id, default={})
+
+ return merge_dicts({
+ 'id': video_id,
+ 'title': video_title,
+ 'description': video_description,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ 'uploader': video_uploader,
+ 'series': series,
+ 'season': season,
+ 'season_number': season_number,
+ 'episode': episode,
+ 'episode_number': episode_number,
+ 'subtitles': subtitles,
+ 'formats': formats,
+ }, info)
+
+
+class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
+ IE_NAME = 'crunchyroll:playlist'
+ _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)'
+
+ _TESTS = [{
+ 'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
+ 'info_dict': {
+ 'id': 'a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
+ 'title': 'A Bridge to the Starry Skies - Hoshizora e Kakaru Hashi'
+ },
+ 'playlist_count': 13,
+ }, {
+ # geo-restricted (US), 18+ maturity wall, non-premium available
+ 'url': 'http://www.crunchyroll.com/cosplay-complex-ova',
+ 'info_dict': {
+ 'id': 'cosplay-complex-ova',
+ 'title': 'Cosplay Complex OVA'
+ },
+ 'playlist_count': 3,
+ 'skip': 'Georestricted',
+ }, {
+ # geo-restricted (US), 18+ maturity wall, non-premium will be available since 2015.11.14
+ 'url': 'http://www.crunchyroll.com/ladies-versus-butlers?skip_wall=1',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ show_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ self._add_skip_wall(url), show_id,
+ headers=self.geo_verification_headers())
+ title = self._html_search_meta('name', webpage, default=None)
+
+ episode_paths = re.findall(
+ r'(?s)<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"',
+ webpage)
+ entries = [
+ self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll', ep_id)
+ for ep_id, ep in episode_paths
+ ]
+ entries.reverse()
+
+ return {
+ '_type': 'playlist',
+ 'id': show_id,
+ 'title': title,
+ 'entries': entries,
+ }
diff --git a/youtube_dl/extractor/cspan.py b/youtube_dlc/extractor/cspan.py
index 67d6df4b0..67d6df4b0 100644
--- a/youtube_dl/extractor/cspan.py
+++ b/youtube_dlc/extractor/cspan.py
diff --git a/youtube_dl/extractor/ctsnews.py b/youtube_dlc/extractor/ctsnews.py
index 679f1d92e..679f1d92e 100644
--- a/youtube_dl/extractor/ctsnews.py
+++ b/youtube_dlc/extractor/ctsnews.py
diff --git a/youtube_dl/extractor/ctvnews.py b/youtube_dlc/extractor/ctvnews.py
index 03f8cefb7..03f8cefb7 100644
--- a/youtube_dl/extractor/ctvnews.py
+++ b/youtube_dlc/extractor/ctvnews.py
diff --git a/youtube_dl/extractor/cultureunplugged.py b/youtube_dlc/extractor/cultureunplugged.py
index bcdf27323..bcdf27323 100644
--- a/youtube_dl/extractor/cultureunplugged.py
+++ b/youtube_dlc/extractor/cultureunplugged.py
diff --git a/youtube_dl/extractor/curiositystream.py b/youtube_dlc/extractor/curiositystream.py
index e4a7fca6c..e4a7fca6c 100644
--- a/youtube_dl/extractor/curiositystream.py
+++ b/youtube_dlc/extractor/curiositystream.py
diff --git a/youtube_dl/extractor/cwtv.py b/youtube_dlc/extractor/cwtv.py
index 73382431b..73382431b 100644
--- a/youtube_dl/extractor/cwtv.py
+++ b/youtube_dlc/extractor/cwtv.py
diff --git a/youtube_dl/extractor/dailymail.py b/youtube_dlc/extractor/dailymail.py
index 67b88fd56..67b88fd56 100644
--- a/youtube_dl/extractor/dailymail.py
+++ b/youtube_dlc/extractor/dailymail.py
diff --git a/youtube_dlc/extractor/dailymotion.py b/youtube_dlc/extractor/dailymotion.py
new file mode 100644
index 000000000..b8529050c
--- /dev/null
+++ b/youtube_dlc/extractor/dailymotion.py
@@ -0,0 +1,393 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import functools
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ age_restricted,
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ OnDemandPagedList,
+ try_get,
+ unescapeHTML,
+ urlencode_postdata,
+)
+
+
+class DailymotionBaseInfoExtractor(InfoExtractor):
+ _FAMILY_FILTER = None
+ _HEADERS = {
+ 'Content-Type': 'application/json',
+ 'Origin': 'https://www.dailymotion.com',
+ }
+ _NETRC_MACHINE = 'dailymotion'
+
+ def _get_dailymotion_cookies(self):
+ return self._get_cookies('https://www.dailymotion.com/')
+
+ @staticmethod
+ def _get_cookie_value(cookies, name):
+ cookie = cookies.get(name)
+ if cookie:
+ return cookie.value
+
+ def _set_dailymotion_cookie(self, name, value):
+ self._set_cookie('www.dailymotion.com', name, value)
+
+ def _real_initialize(self):
+ cookies = self._get_dailymotion_cookies()
+ ff = self._get_cookie_value(cookies, 'ff')
+ self._FAMILY_FILTER = ff == 'on' if ff else age_restricted(18, self._downloader.params.get('age_limit'))
+ self._set_dailymotion_cookie('ff', 'on' if self._FAMILY_FILTER else 'off')
+
+ def _call_api(self, object_type, xid, object_fields, note, filter_extra=None):
+ if not self._HEADERS.get('Authorization'):
+ cookies = self._get_dailymotion_cookies()
+ token = self._get_cookie_value(cookies, 'access_token') or self._get_cookie_value(cookies, 'client_token')
+ if not token:
+ data = {
+ 'client_id': 'f1a362d288c1b98099c7',
+ 'client_secret': 'eea605b96e01c796ff369935357eca920c5da4c5',
+ }
+ username, password = self._get_login_info()
+ if username:
+ data.update({
+ 'grant_type': 'password',
+ 'password': password,
+ 'username': username,
+ })
+ else:
+ data['grant_type'] = 'client_credentials'
+ try:
+ token = self._download_json(
+ 'https://graphql.api.dailymotion.com/oauth/token',
+ None, 'Downloading Access Token',
+ data=urlencode_postdata(data))['access_token']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
+ raise ExtractorError(self._parse_json(
+ e.cause.read().decode(), xid)['error_description'], expected=True)
+ raise
+ self._set_dailymotion_cookie('access_token' if username else 'client_token', token)
+ self._HEADERS['Authorization'] = 'Bearer ' + token
+
+ resp = self._download_json(
+ 'https://graphql.api.dailymotion.com/', xid, note, data=json.dumps({
+ 'query': '''{
+ %s(xid: "%s"%s) {
+ %s
+ }
+}''' % (object_type, xid, ', ' + filter_extra if filter_extra else '', object_fields),
+ }).encode(), headers=self._HEADERS)
+ obj = resp['data'][object_type]
+ if not obj:
+ raise ExtractorError(resp['errors'][0]['message'], expected=True)
+ return obj
+
+
+class DailymotionIE(DailymotionBaseInfoExtractor):
+ _VALID_URL = r'''(?ix)
+ https?://
+ (?:
+ (?:(?:www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|\#)/)?video|swf)|
+ (?:www\.)?lequipe\.fr/video
+ )
+ /(?P<id>[^/?_]+)(?:.+?\bplaylist=(?P<playlist_id>x[0-9a-z]+))?
+ '''
+ IE_NAME = 'dailymotion'
+ _TESTS = [{
+ 'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news',
+ 'md5': '074b95bdee76b9e3654137aee9c79dfe',
+ 'info_dict': {
+ 'id': 'x5kesuj',
+ 'ext': 'mp4',
+ 'title': 'Office Christmas Party Review – Jason Bateman, Olivia Munn, T.J. Miller',
+ 'description': 'Office Christmas Party Review - Jason Bateman, Olivia Munn, T.J. Miller',
+ 'duration': 187,
+ 'timestamp': 1493651285,
+ 'upload_date': '20170501',
+ 'uploader': 'Deadline',
+ 'uploader_id': 'x1xm8ri',
+ 'age_limit': 0,
+ },
+ }, {
+ 'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
+ 'md5': '2137c41a8e78554bb09225b8eb322406',
+ 'info_dict': {
+ 'id': 'x2iuewm',
+ 'ext': 'mp4',
+ 'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News',
+ 'description': 'Several come bundled with the Steam Controller.',
+ 'thumbnail': r're:^https?:.*\.(?:jpg|png)$',
+ 'duration': 74,
+ 'timestamp': 1425657362,
+ 'upload_date': '20150306',
+ 'uploader': 'IGN',
+ 'uploader_id': 'xijv66',
+ 'age_limit': 0,
+ 'view_count': int,
+ },
+ 'skip': 'video gone',
+ }, {
+ # Vevo video
+ 'url': 'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi',
+ 'info_dict': {
+ 'title': 'Roar (Official)',
+ 'id': 'USUV71301934',
+ 'ext': 'mp4',
+ 'uploader': 'Katy Perry',
+ 'upload_date': '20130905',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'VEVO is only available in some countries',
+ }, {
+ # age-restricted video
+ 'url': 'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband',
+ 'md5': '0d667a7b9cebecc3c89ee93099c4159d',
+ 'info_dict': {
+ 'id': 'xyh2zz',
+ 'ext': 'mp4',
+ 'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]',
+ 'uploader': 'HotWaves1012',
+ 'age_limit': 18,
+ },
+ 'skip': 'video gone',
+ }, {
+ # geo-restricted, player v5
+ 'url': 'http://www.dailymotion.com/video/xhza0o',
+ 'only_matching': True,
+ }, {
+ # with subtitles
+ 'url': 'http://www.dailymotion.com/video/x20su5f_the-power-of-nightmares-1-the-rise-of-the-politics-of-fear-bbc-2004_news',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.dailymotion.com/swf/video/x3n92nf',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.dailymotion.com/swf/x3ss1m_funny-magic-trick-barry-and-stuart_fun',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.lequipe.fr/video/x791mem',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.lequipe.fr/video/k7MtHciueyTcrFtFKA2',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.dailymotion.com/video/x3z49k?playlist=xv4bw',
+ 'only_matching': True,
+ }]
+ _GEO_BYPASS = False
+ _COMMON_MEDIA_FIELDS = '''description
+ geoblockedCountries {
+ allowed
+ }
+ xid'''
+
+ @staticmethod
+ def _extract_urls(webpage):
+ urls = []
+ # Look for embedded Dailymotion player
+ # https://developer.dailymotion.com/player#player-parameters
+ for mobj in re.finditer(
+ r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage):
+ urls.append(unescapeHTML(mobj.group('url')))
+ for mobj in re.finditer(
+ r'(?s)DM\.player\([^,]+,\s*{.*?video[\'"]?\s*:\s*["\']?(?P<id>[0-9a-zA-Z]+).+?}\s*\);', webpage):
+ urls.append('https://www.dailymotion.com/embed/video/' + mobj.group('id'))
+ return urls
+
+ def _real_extract(self, url):
+ video_id, playlist_id = re.match(self._VALID_URL, url).groups()
+
+ if playlist_id:
+ if not self._downloader.params.get('noplaylist'):
+ self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id)
+ return self.url_result(
+ 'http://www.dailymotion.com/playlist/' + playlist_id,
+ 'DailymotionPlaylist', playlist_id)
+ self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+
+ password = self._downloader.params.get('videopassword')
+ media = self._call_api(
+ 'media', video_id, '''... on Video {
+ %s
+ stats {
+ likes {
+ total
+ }
+ views {
+ total
+ }
+ }
+ }
+ ... on Live {
+ %s
+ audienceCount
+ isOnAir
+ }''' % (self._COMMON_MEDIA_FIELDS, self._COMMON_MEDIA_FIELDS), 'Downloading media JSON metadata',
+ 'password: "%s"' % self._downloader.params.get('videopassword') if password else None)
+ xid = media['xid']
+
+ metadata = self._download_json(
+ 'https://www.dailymotion.com/player/metadata/video/' + xid,
+ xid, 'Downloading metadata JSON',
+ query={'app': 'com.dailymotion.neon'})
+
+ error = metadata.get('error')
+ if error:
+ title = error.get('title') or error['raw_message']
+ # See https://developer.dailymotion.com/api#access-error
+ if error.get('code') == 'DM007':
+ allowed_countries = try_get(media, lambda x: x['geoblockedCountries']['allowed'], list)
+ self.raise_geo_restricted(msg=title, countries=allowed_countries)
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, title), expected=True)
+
+ title = metadata['title']
+ is_live = media.get('isOnAir')
+ formats = []
+ for quality, media_list in metadata['qualities'].items():
+ for m in media_list:
+ media_url = m.get('url')
+ media_type = m.get('type')
+ if not media_url or media_type == 'application/vnd.lumberjack.manifest':
+ continue
+ if media_type == 'application/x-mpegURL':
+ formats.extend(self._extract_m3u8_formats(
+ media_url, video_id, 'mp4',
+ 'm3u8' if is_live else 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ f = {
+ 'url': media_url,
+ 'format_id': 'http-' + quality,
+ }
+ m = re.search(r'/H264-(\d+)x(\d+)(?:-(60)/)?', media_url)
+ if m:
+ width, height, fps = map(int_or_none, m.groups())
+ f.update({
+ 'fps': fps,
+ 'height': height,
+ 'width': width,
+ })
+ formats.append(f)
+ for f in formats:
+ f['url'] = f['url'].split('#')[0]
+ if not f.get('fps') and f['format_id'].endswith('@60'):
+ f['fps'] = 60
+ self._sort_formats(formats)
+
+ subtitles = {}
+ subtitles_data = try_get(metadata, lambda x: x['subtitles']['data'], dict) or {}
+ for subtitle_lang, subtitle in subtitles_data.items():
+ subtitles[subtitle_lang] = [{
+ 'url': subtitle_url,
+ } for subtitle_url in subtitle.get('urls', [])]
+
+ thumbnails = []
+ for height, poster_url in metadata.get('posters', {}).items():
+ thumbnails.append({
+ 'height': int_or_none(height),
+ 'id': height,
+ 'url': poster_url,
+ })
+
+ owner = metadata.get('owner') or {}
+ stats = media.get('stats') or {}
+ get_count = lambda x: int_or_none(try_get(stats, lambda y: y[x + 's']['total']))
+
+ return {
+ 'id': video_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'description': clean_html(media.get('description')),
+ 'thumbnails': thumbnails,
+ 'duration': int_or_none(metadata.get('duration')) or None,
+ 'timestamp': int_or_none(metadata.get('created_time')),
+ 'uploader': owner.get('screenname'),
+ 'uploader_id': owner.get('id') or metadata.get('screenname'),
+ 'age_limit': 18 if metadata.get('explicit') else 0,
+ 'tags': metadata.get('tags'),
+ 'view_count': get_count('view') or int_or_none(media.get('audienceCount')),
+ 'like_count': get_count('like'),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': is_live,
+ }
+
+
+class DailymotionPlaylistBaseIE(DailymotionBaseInfoExtractor):
+ _PAGE_SIZE = 100
+
+ def _fetch_page(self, playlist_id, page):
+ page += 1
+ videos = self._call_api(
+ self._OBJECT_TYPE, playlist_id,
+ '''videos(allowExplicit: %s, first: %d, page: %d) {
+ edges {
+ node {
+ xid
+ url
+ }
+ }
+ }''' % ('false' if self._FAMILY_FILTER else 'true', self._PAGE_SIZE, page),
+ 'Downloading page %d' % page)['videos']
+ for edge in videos['edges']:
+ node = edge['node']
+ yield self.url_result(
+ node['url'], DailymotionIE.ie_key(), node['xid'])
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ entries = OnDemandPagedList(functools.partial(
+ self._fetch_page, playlist_id), self._PAGE_SIZE)
+ return self.playlist_result(
+ entries, playlist_id)
+
+
+class DailymotionPlaylistIE(DailymotionPlaylistBaseIE):
+ IE_NAME = 'dailymotion:playlist'
+ _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>x[0-9a-z]+)'
+ _TESTS = [{
+ 'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
+ 'info_dict': {
+ 'id': 'xv4bw',
+ },
+ 'playlist_mincount': 20,
+ }]
+ _OBJECT_TYPE = 'collection'
+
+
+class DailymotionUserIE(DailymotionPlaylistBaseIE):
+ IE_NAME = 'dailymotion:user'
+ _VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'https://www.dailymotion.com/user/nqtv',
+ 'info_dict': {
+ 'id': 'nqtv',
+ },
+ 'playlist_mincount': 152,
+ }, {
+ 'url': 'http://www.dailymotion.com/user/UnderProject',
+ 'info_dict': {
+ 'id': 'UnderProject',
+ },
+ 'playlist_mincount': 1000,
+ 'skip': 'Takes too long time',
+ }, {
+ 'url': 'https://www.dailymotion.com/user/nqtv',
+ 'info_dict': {
+ 'id': 'nqtv',
+ },
+ 'playlist_mincount': 148,
+ 'params': {
+ 'age_limit': 0,
+ },
+ }]
+ _OBJECT_TYPE = 'channel'
diff --git a/youtube_dlc/extractor/daum.py b/youtube_dlc/extractor/daum.py
new file mode 100644
index 000000000..137095577
--- /dev/null
+++ b/youtube_dlc/extractor/daum.py
@@ -0,0 +1,266 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+import itertools
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_parse_unquote,
+ compat_urlparse,
+)
+
+
+class DaumBaseIE(InfoExtractor):
+ _KAKAO_EMBED_BASE = 'http://tv.kakao.com/embed/player/cliplink/'
+
+
+class DaumIE(DaumBaseIE):
+ _VALID_URL = r'https?://(?:(?:m\.)?tvpot\.daum\.net/v/|videofarm\.daum\.net/controller/player/VodPlayer\.swf\?vid=)(?P<id>[^?#&]+)'
+ IE_NAME = 'daum.net'
+
+ _TESTS = [{
+ 'url': 'http://tvpot.daum.net/v/vab4dyeDBysyBssyukBUjBz',
+ 'info_dict': {
+ 'id': 'vab4dyeDBysyBssyukBUjBz',
+ 'ext': 'mp4',
+ 'title': '마크 헌트 vs 안토니오 실바',
+ 'description': 'Mark Hunt vs Antonio Silva',
+ 'upload_date': '20131217',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'duration': 2117,
+ 'view_count': int,
+ 'comment_count': int,
+ 'uploader_id': 186139,
+ 'uploader': '콘간지',
+ 'timestamp': 1387310323,
+ },
+ }, {
+ 'url': 'http://m.tvpot.daum.net/v/65139429',
+ 'info_dict': {
+ 'id': '65139429',
+ 'ext': 'mp4',
+ 'title': '1297회, \'아빠 아들로 태어나길 잘 했어\' 민수, 감동의 눈물[아빠 어디가] 20150118',
+ 'description': 'md5:79794514261164ff27e36a21ad229fc5',
+ 'upload_date': '20150118',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'duration': 154,
+ 'view_count': int,
+ 'comment_count': int,
+ 'uploader': 'MBC 예능',
+ 'uploader_id': 132251,
+ 'timestamp': 1421604228,
+ },
+ }, {
+ 'url': 'http://tvpot.daum.net/v/07dXWRka62Y%24',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videofarm.daum.net/controller/player/VodPlayer.swf?vid=vwIpVpCQsT8%24&ref=',
+ 'info_dict': {
+ 'id': 'vwIpVpCQsT8$',
+ 'ext': 'flv',
+ 'title': '01-Korean War ( Trouble on the horizon )',
+ 'description': 'Korean War 01\r\nTrouble on the horizon\r\n전쟁의 먹구름',
+ 'upload_date': '20080223',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'duration': 249,
+ 'view_count': int,
+ 'comment_count': int,
+ 'uploader': '까칠한 墮落始祖 황비홍님의',
+ 'uploader_id': 560824,
+ 'timestamp': 1203770745,
+ },
+ }, {
+ # Requires dte_type=WEB (#9972)
+ 'url': 'http://tvpot.daum.net/v/s3794Uf1NZeZ1qMpGpeqeRU',
+ 'md5': 'a8917742069a4dd442516b86e7d66529',
+ 'info_dict': {
+ 'id': 's3794Uf1NZeZ1qMpGpeqeRU',
+ 'ext': 'mp4',
+ 'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
+ 'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
+ 'upload_date': '20170129',
+ 'uploader': '쇼! 음악중심',
+ 'uploader_id': 2653210,
+ 'timestamp': 1485684628,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = compat_urllib_parse_unquote(self._match_id(url))
+ if not video_id.isdigit():
+ video_id += '@my'
+ return self.url_result(
+ self._KAKAO_EMBED_BASE + video_id, 'Kakao', video_id)
+
+
+class DaumClipIE(DaumBaseIE):
+ _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/(?:clip/ClipView.(?:do|tv)|mypot/View.do)\?.*?clipid=(?P<id>\d+)'
+ IE_NAME = 'daum.net:clip'
+ _URL_TEMPLATE = 'http://tvpot.daum.net/clip/ClipView.do?clipid=%s'
+
+ _TESTS = [{
+ 'url': 'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
+ 'info_dict': {
+ 'id': '52554690',
+ 'ext': 'mp4',
+ 'title': 'DOTA 2GETHER 시즌2 6회 - 2부',
+ 'description': 'DOTA 2GETHER 시즌2 6회 - 2부',
+ 'upload_date': '20130831',
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)',
+ 'duration': 3868,
+ 'view_count': int,
+ 'uploader': 'GOMeXP',
+ 'uploader_id': 6667,
+ 'timestamp': 1377911092,
+ },
+ }, {
+ 'url': 'http://m.tvpot.daum.net/clip/ClipView.tv?clipid=54999425',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if DaumPlaylistIE.suitable(url) or DaumUserIE.suitable(url) else super(DaumClipIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(
+ self._KAKAO_EMBED_BASE + video_id, 'Kakao', video_id)
+
+
+class DaumListIE(InfoExtractor):
+ def _get_entries(self, list_id, list_id_type):
+ name = None
+ entries = []
+ for pagenum in itertools.count(1):
+ list_info = self._download_json(
+ 'http://tvpot.daum.net/mypot/json/GetClipInfo.do?size=48&init=true&order=date&page=%d&%s=%s' % (
+ pagenum, list_id_type, list_id), list_id, 'Downloading list info - %s' % pagenum)
+
+ entries.extend([
+ self.url_result(
+ 'http://tvpot.daum.net/v/%s' % clip['vid'])
+ for clip in list_info['clip_list']
+ ])
+
+ if not name:
+ name = list_info.get('playlist_bean', {}).get('name') or \
+ list_info.get('potInfo', {}).get('name')
+
+ if not list_info.get('has_more'):
+ break
+
+ return name, entries
+
+ def _check_clip(self, url, list_id):
+ query_dict = compat_parse_qs(compat_urlparse.urlparse(url).query)
+ if 'clipid' in query_dict:
+ clip_id = query_dict['clipid'][0]
+ if self._downloader.params.get('noplaylist'):
+ self.to_screen('Downloading just video %s because of --no-playlist' % clip_id)
+ return self.url_result(DaumClipIE._URL_TEMPLATE % clip_id, 'DaumClip')
+ else:
+ self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % list_id)
+
+
+class DaumPlaylistIE(DaumListIE):
+ _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/mypot/(?:View\.do|Top\.tv)\?.*?playlistid=(?P<id>[0-9]+)'
+ IE_NAME = 'daum.net:playlist'
+ _URL_TEMPLATE = 'http://tvpot.daum.net/mypot/View.do?playlistid=%s'
+
+ _TESTS = [{
+ 'note': 'Playlist url with clipid',
+ 'url': 'http://tvpot.daum.net/mypot/View.do?playlistid=6213966&clipid=73806844',
+ 'info_dict': {
+ 'id': '6213966',
+ 'title': 'Woorissica Official',
+ },
+ 'playlist_mincount': 181
+ }, {
+ 'note': 'Playlist url with clipid - noplaylist',
+ 'url': 'http://tvpot.daum.net/mypot/View.do?playlistid=6213966&clipid=73806844',
+ 'info_dict': {
+ 'id': '73806844',
+ 'ext': 'mp4',
+ 'title': '151017 Airport',
+ 'upload_date': '20160117',
+ },
+ 'params': {
+ 'noplaylist': True,
+ 'skip_download': True,
+ }
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if DaumUserIE.suitable(url) else super(DaumPlaylistIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+
+ clip_result = self._check_clip(url, list_id)
+ if clip_result:
+ return clip_result
+
+ name, entries = self._get_entries(list_id, 'playlistid')
+
+ return self.playlist_result(entries, list_id, name)
+
+
+class DaumUserIE(DaumListIE):
+ _VALID_URL = r'https?://(?:m\.)?tvpot\.daum\.net/mypot/(?:View|Top)\.(?:do|tv)\?.*?ownerid=(?P<id>[0-9a-zA-Z]+)'
+ IE_NAME = 'daum.net:user'
+
+ _TESTS = [{
+ 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0',
+ 'info_dict': {
+ 'id': 'o2scDLIVbHc0',
+ 'title': '마이 리틀 텔레비전',
+ },
+ 'playlist_mincount': 213
+ }, {
+ 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0&clipid=73801156',
+ 'info_dict': {
+ 'id': '73801156',
+ 'ext': 'mp4',
+ 'title': '[미공개] 김구라, 오만석이 부릅니다 \'오케피\' - 마이 리틀 텔레비전 20160116',
+ 'upload_date': '20160117',
+ 'description': 'md5:5e91d2d6747f53575badd24bd62b9f36'
+ },
+ 'params': {
+ 'noplaylist': True,
+ 'skip_download': True,
+ }
+ }, {
+ 'note': 'Playlist url has ownerid and playlistid, playlistid takes precedence',
+ 'url': 'http://tvpot.daum.net/mypot/View.do?ownerid=o2scDLIVbHc0&playlistid=6196631',
+ 'info_dict': {
+ 'id': '6196631',
+ 'title': '마이 리틀 텔레비전 - 20160109',
+ },
+ 'playlist_count': 11
+ }, {
+ 'url': 'http://tvpot.daum.net/mypot/Top.do?ownerid=o2scDLIVbHc0',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://m.tvpot.daum.net/mypot/Top.tv?ownerid=45x1okb1If50&playlistid=3569733',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+
+ clip_result = self._check_clip(url, list_id)
+ if clip_result:
+ return clip_result
+
+ query_dict = compat_parse_qs(compat_urlparse.urlparse(url).query)
+ if 'playlistid' in query_dict:
+ playlist_id = query_dict['playlistid'][0]
+ return self.url_result(DaumPlaylistIE._URL_TEMPLATE % playlist_id, 'DaumPlaylist')
+
+ name, entries = self._get_entries(list_id, 'ownerid')
+
+ return self.playlist_result(entries, list_id, name)
diff --git a/youtube_dl/extractor/dbtv.py b/youtube_dlc/extractor/dbtv.py
index aaedf2e3d..aaedf2e3d 100644
--- a/youtube_dl/extractor/dbtv.py
+++ b/youtube_dlc/extractor/dbtv.py
diff --git a/youtube_dlc/extractor/dctp.py b/youtube_dlc/extractor/dctp.py
new file mode 100644
index 000000000..e700f8d86
--- /dev/null
+++ b/youtube_dlc/extractor/dctp.py
@@ -0,0 +1,105 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class DctpTvIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ # 4x3
+ 'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
+ 'md5': '3ffbd1556c3fe210724d7088fad723e3',
+ 'info_dict': {
+ 'id': '95eaa4f33dad413aa17b4ee613cccc6c',
+ 'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
+ 'ext': 'm4v',
+ 'title': 'Videoinstallation für eine Kaufhausfassade',
+ 'description': 'Kurzfilm',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 71.24,
+ 'timestamp': 1302172322,
+ 'upload_date': '20110407',
+ },
+ }, {
+ # 16x9
+ 'url': 'http://www.dctp.tv/filme/sind-youtuber-die-besseren-lehrer/',
+ 'only_matching': True,
+ }]
+
+ _BASE_URL = 'http://dctp-ivms2-restapi.s3.amazonaws.com'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ version = self._download_json(
+ '%s/version.json' % self._BASE_URL, display_id,
+ 'Downloading version JSON')
+
+ restapi_base = '%s/%s/restapi' % (
+ self._BASE_URL, version['version_name'])
+
+ info = self._download_json(
+ '%s/slugs/%s.json' % (restapi_base, display_id), display_id,
+ 'Downloading video info JSON')
+
+ media = self._download_json(
+ '%s/media/%s.json' % (restapi_base, compat_str(info['object_id'])),
+ display_id, 'Downloading media JSON')
+
+ uuid = media['uuid']
+ title = media['title']
+ is_wide = media.get('is_wide')
+ formats = []
+
+ def add_formats(suffix):
+ templ = 'https://%%s/%s_dctp_%s.m4v' % (uuid, suffix)
+ formats.extend([{
+ 'format_id': 'hls-' + suffix,
+ 'url': templ % 'cdn-segments.dctp.tv' + '/playlist.m3u8',
+ 'protocol': 'm3u8_native',
+ }, {
+ 'format_id': 's3-' + suffix,
+ 'url': templ % 'completed-media.s3.amazonaws.com',
+ }, {
+ 'format_id': 'http-' + suffix,
+ 'url': templ % 'cdn-media.dctp.tv',
+ }])
+
+ add_formats('0500_' + ('16x9' if is_wide else '4x3'))
+ if is_wide:
+ add_formats('720p')
+
+ thumbnails = []
+ images = media.get('images')
+ if isinstance(images, list):
+ for image in images:
+ if not isinstance(image, dict):
+ continue
+ image_url = url_or_none(image.get('url'))
+ if not image_url:
+ continue
+ thumbnails.append({
+ 'url': image_url,
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ })
+
+ return {
+ 'id': uuid,
+ 'display_id': display_id,
+ 'title': title,
+ 'alt_title': media.get('subtitle'),
+ 'description': media.get('description') or media.get('teaser'),
+ 'timestamp': unified_timestamp(media.get('created')),
+ 'duration': float_or_none(media.get('duration_in_ms'), scale=1000),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/deezer.py b/youtube_dlc/extractor/deezer.py
index a38b2683d..a38b2683d 100644
--- a/youtube_dl/extractor/deezer.py
+++ b/youtube_dlc/extractor/deezer.py
diff --git a/youtube_dl/extractor/defense.py b/youtube_dlc/extractor/defense.py
index 9fe144e14..9fe144e14 100644
--- a/youtube_dl/extractor/defense.py
+++ b/youtube_dlc/extractor/defense.py
diff --git a/youtube_dl/extractor/democracynow.py b/youtube_dlc/extractor/democracynow.py
index 5c9c0ecdc..5c9c0ecdc 100644
--- a/youtube_dl/extractor/democracynow.py
+++ b/youtube_dlc/extractor/democracynow.py
diff --git a/youtube_dl/extractor/dfb.py b/youtube_dlc/extractor/dfb.py
index a4d0448c2..a4d0448c2 100644
--- a/youtube_dl/extractor/dfb.py
+++ b/youtube_dlc/extractor/dfb.py
diff --git a/youtube_dl/extractor/dhm.py b/youtube_dlc/extractor/dhm.py
index aee72a6ed..aee72a6ed 100644
--- a/youtube_dl/extractor/dhm.py
+++ b/youtube_dlc/extractor/dhm.py
diff --git a/youtube_dl/extractor/digg.py b/youtube_dlc/extractor/digg.py
index 913c1750f..913c1750f 100644
--- a/youtube_dl/extractor/digg.py
+++ b/youtube_dlc/extractor/digg.py
diff --git a/youtube_dl/extractor/digiteka.py b/youtube_dlc/extractor/digiteka.py
index 3dfde0d8c..3dfde0d8c 100644
--- a/youtube_dl/extractor/digiteka.py
+++ b/youtube_dlc/extractor/digiteka.py
diff --git a/youtube_dlc/extractor/discovery.py b/youtube_dlc/extractor/discovery.py
new file mode 100644
index 000000000..e0139cc86
--- /dev/null
+++ b/youtube_dlc/extractor/discovery.py
@@ -0,0 +1,118 @@
+from __future__ import unicode_literals
+
+import random
+import re
+import string
+
+from .discoverygo import DiscoveryGoBaseIE
+from ..compat import compat_urllib_parse_unquote
+from ..utils import ExtractorError
+from ..compat import compat_HTTPError
+
+
+class DiscoveryIE(DiscoveryGoBaseIE):
+ _VALID_URL = r'''(?x)https?://
+ (?P<site>
+ go\.discovery|
+ www\.
+ (?:
+ investigationdiscovery|
+ discoverylife|
+ animalplanet|
+ ahctv|
+ destinationamerica|
+ sciencechannel|
+ tlc
+ )|
+ watch\.
+ (?:
+ hgtv|
+ foodnetwork|
+ travelchannel|
+ diynetwork|
+ cookingchanneltv|
+ motortrend
+ )
+ )\.com/tv-shows/(?P<show_slug>[^/]+)/(?:video|full-episode)s/(?P<id>[^./?#]+)'''
+ _TESTS = [{
+ 'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry',
+ 'info_dict': {
+ 'id': '5a2f35ce6b66d17a5026e29e',
+ 'ext': 'mp4',
+ 'title': 'Riding with Matthew Perry',
+ 'description': 'md5:a34333153e79bc4526019a5129e7f878',
+ 'duration': 84,
+ },
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ }
+ }, {
+ 'url': 'https://www.investigationdiscovery.com/tv-shows/final-vision/full-episodes/final-vision',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://go.discovery.com/tv-shows/alaskan-bush-people/videos/follow-your-own-road',
+ 'only_matching': True,
+ }, {
+ # using `show_slug` is important to get the correct video data
+ 'url': 'https://www.sciencechannel.com/tv-shows/mythbusters-on-science/full-episodes/christmas-special',
+ 'only_matching': True,
+ }]
+ _GEO_COUNTRIES = ['US']
+ _GEO_BYPASS = False
+ _API_BASE_URL = 'https://api.discovery.com/v1/'
+
+ def _real_extract(self, url):
+ site, show_slug, display_id = re.match(self._VALID_URL, url).groups()
+
+ access_token = None
+ cookies = self._get_cookies(url)
+
+ # prefer Affiliate Auth Token over Anonymous Auth Token
+ auth_storage_cookie = cookies.get('eosAf') or cookies.get('eosAn')
+ if auth_storage_cookie and auth_storage_cookie.value:
+ auth_storage = self._parse_json(compat_urllib_parse_unquote(
+ compat_urllib_parse_unquote(auth_storage_cookie.value)),
+ display_id, fatal=False) or {}
+ access_token = auth_storage.get('a') or auth_storage.get('access_token')
+
+ if not access_token:
+ access_token = self._download_json(
+ 'https://%s.com/anonymous' % site, display_id,
+ 'Downloading token JSON metadata', query={
+ 'authRel': 'authorization',
+ 'client_id': '3020a40c2356a645b4b4',
+ 'nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
+ 'redirectUri': 'https://www.discovery.com/',
+ })['access_token']
+
+ headers = self.geo_verification_headers()
+ headers['Authorization'] = 'Bearer ' + access_token
+
+ try:
+ video = self._download_json(
+ self._API_BASE_URL + 'content/videos',
+ display_id, 'Downloading content JSON metadata',
+ headers=headers, query={
+ 'embed': 'show.name',
+ 'fields': 'authenticated,description.detailed,duration,episodeNumber,id,name,parental.rating,season.number,show,tags',
+ 'slug': display_id,
+ 'show_slug': show_slug,
+ })[0]
+ video_id = video['id']
+ stream = self._download_json(
+ self._API_BASE_URL + 'streaming/video/' + video_id,
+ display_id, 'Downloading streaming JSON metadata', headers=headers)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403):
+ e_description = self._parse_json(
+ e.cause.read().decode(), display_id)['description']
+ if 'resource not available for country' in e_description:
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+ if 'Authorized Networks' in e_description:
+ raise ExtractorError(
+ 'This video is only available via cable service provider subscription that'
+ ' is not currently supported. You may want to use --cookies.', expected=True)
+ raise ExtractorError(e_description)
+ raise
+
+ return self._extract_video_info(video, stream, display_id)
diff --git a/youtube_dl/extractor/discoverygo.py b/youtube_dlc/extractor/discoverygo.py
index 9e7b14a7d..9e7b14a7d 100644
--- a/youtube_dl/extractor/discoverygo.py
+++ b/youtube_dlc/extractor/discoverygo.py
diff --git a/youtube_dlc/extractor/discoverynetworks.py b/youtube_dlc/extractor/discoverynetworks.py
new file mode 100644
index 000000000..607a54948
--- /dev/null
+++ b/youtube_dlc/extractor/discoverynetworks.py
@@ -0,0 +1,40 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .dplay import DPlayIE
+
+
+class DiscoveryNetworksDeIE(DPlayIE):
+ _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:tlc|dmax)\.de|dplay\.co\.uk)/(?:programme|show)/(?P<programme>[^/]+)/video/(?P<alternate_id>[^/]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.tlc.de/programme/breaking-amish/video/die-welt-da-drauen/DCB331270001100',
+ 'info_dict': {
+ 'id': '78867',
+ 'ext': 'mp4',
+ 'title': 'Die Welt da draußen',
+ 'description': 'md5:61033c12b73286e409d99a41742ef608',
+ 'timestamp': 1554069600,
+ 'upload_date': '20190331',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.dmax.de/programme/dmax-highlights/video/tuning-star-sidney-hoffmann-exklusiv-bei-dmax/191023082312316',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.dplay.co.uk/show/ghost-adventures/video/hotel-leger-103620/EHD_280313B',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ domain, programme, alternate_id = re.match(self._VALID_URL, url).groups()
+ country = 'GB' if domain == 'dplay.co.uk' else 'DE'
+ realm = 'questuk' if country == 'GB' else domain.replace('.', '')
+ return self._get_disco_api_info(
+ url, '%s/%s' % (programme, alternate_id),
+ 'sonic-eu1-prod.disco-api.com', realm, country)
diff --git a/youtube_dl/extractor/discoveryvr.py b/youtube_dlc/extractor/discoveryvr.py
index cb63c2649..cb63c2649 100644
--- a/youtube_dl/extractor/discoveryvr.py
+++ b/youtube_dlc/extractor/discoveryvr.py
diff --git a/youtube_dl/extractor/disney.py b/youtube_dlc/extractor/disney.py
index 0eee82fd6..0eee82fd6 100644
--- a/youtube_dl/extractor/disney.py
+++ b/youtube_dlc/extractor/disney.py
diff --git a/youtube_dl/extractor/dispeak.py b/youtube_dlc/extractor/dispeak.py
index c345e0274..c345e0274 100644
--- a/youtube_dl/extractor/dispeak.py
+++ b/youtube_dlc/extractor/dispeak.py
diff --git a/youtube_dl/extractor/dlive.py b/youtube_dlc/extractor/dlive.py
index d95c67a5b..d95c67a5b 100644
--- a/youtube_dl/extractor/dlive.py
+++ b/youtube_dlc/extractor/dlive.py
diff --git a/youtube_dlc/extractor/doodstream.py b/youtube_dlc/extractor/doodstream.py
new file mode 100644
index 000000000..2c9ea6898
--- /dev/null
+++ b/youtube_dlc/extractor/doodstream.py
@@ -0,0 +1,71 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import string
+import random
+import time
+
+from .common import InfoExtractor
+
+
+class DoodStreamIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?dood\.(?:to|watch)/[ed]/(?P<id>[a-z0-9]+)'
+ _TESTS = [{
+ 'url': 'http://dood.to/e/5s1wmbdacezb',
+ 'md5': '4568b83b31e13242b3f1ff96c55f0595',
+ 'info_dict': {
+ 'id': '5s1wmbdacezb',
+ 'ext': 'mp4',
+ 'title': 'Kat Wonders - Monthly May 2020',
+ 'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com',
+ 'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg',
+ }
+ }, {
+ 'url': 'https://dood.to/d/jzrxn12t2s7n',
+ 'md5': '3207e199426eca7c2aa23c2872e6728a',
+ 'info_dict': {
+ 'id': 'jzrxn12t2s7n',
+ 'ext': 'mp4',
+ 'title': 'Stacy Cruz Cute ALLWAYSWELL',
+ 'description': 'Stacy Cruz Cute ALLWAYSWELL | DoodStream.com',
+ 'thumbnail': 'https://img.doodcdn.com/snaps/8edqd5nppkac3x8u.jpg',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ if '/d/' in url:
+ url = "https://dood.to" + self._html_search_regex(
+ r'<iframe src="(/e/[a-z0-9]+)"', webpage, 'embed')
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_meta(['og:title', 'twitter:title'],
+ webpage, default=None)
+ thumb = self._html_search_meta(['og:image', 'twitter:image'],
+ webpage, default=None)
+ token = self._html_search_regex(r'[?&]token=([a-z0-9]+)[&\']', webpage, 'token')
+ description = self._html_search_meta(
+ ['og:description', 'description', 'twitter:description'],
+ webpage, default=None)
+ auth_url = 'https://dood.to' + self._html_search_regex(
+ r'(/pass_md5.*?)\'', webpage, 'pass_md5')
+ headers = {
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/66.0',
+ 'referer': url
+ }
+
+ webpage = self._download_webpage(auth_url, video_id, headers=headers)
+ final_url = webpage + ''.join([random.choice(string.ascii_letters + string.digits) for _ in range(10)]) + "?token=" + token + "&expiry=" + str(int(time.time() * 1000))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': final_url,
+ 'http_headers': headers,
+ 'ext': 'mp4',
+ 'description': description,
+ 'thumbnail': thumb,
+ }
diff --git a/youtube_dl/extractor/dotsub.py b/youtube_dlc/extractor/dotsub.py
index 148605c0b..148605c0b 100644
--- a/youtube_dl/extractor/dotsub.py
+++ b/youtube_dlc/extractor/dotsub.py
diff --git a/youtube_dl/extractor/douyutv.py b/youtube_dlc/extractor/douyutv.py
index 9757f4422..9757f4422 100644
--- a/youtube_dl/extractor/douyutv.py
+++ b/youtube_dlc/extractor/douyutv.py
diff --git a/youtube_dlc/extractor/dplay.py b/youtube_dlc/extractor/dplay.py
new file mode 100644
index 000000000..a7b9db568
--- /dev/null
+++ b/youtube_dlc/extractor/dplay.py
@@ -0,0 +1,247 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ unified_timestamp,
+)
+
+
+class DPlayIE(InfoExtractor):
+ _VALID_URL = r'''(?x)https?://
+ (?P<domain>
+ (?:www\.)?(?P<host>dplay\.(?P<country>dk|fi|jp|se|no))|
+ (?P<subdomain_country>es|it)\.dplay\.com
+ )/[^/]+/(?P<id>[^/]+/[^/?#]+)'''
+
+ _TESTS = [{
+ # non geo restricted, via secure api, unsigned download hls URL
+ 'url': 'https://www.dplay.se/videos/nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
+ 'info_dict': {
+ 'id': '13628',
+ 'display_id': 'nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
+ 'ext': 'mp4',
+ 'title': 'Svensken lär sig njuta av livet',
+ 'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8',
+ 'duration': 2649.856,
+ 'timestamp': 1365453720,
+ 'upload_date': '20130408',
+ 'creator': 'Kanal 5',
+ 'series': 'Nugammalt - 77 händelser som format Sverige',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ }, {
+ # geo restricted, via secure api, unsigned download hls URL
+ 'url': 'http://www.dplay.dk/videoer/ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
+ 'info_dict': {
+ 'id': '104465',
+ 'display_id': 'ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
+ 'ext': 'mp4',
+ 'title': 'Ted Bundy: Mind Of A Monster',
+ 'description': 'md5:8b780f6f18de4dae631668b8a9637995',
+ 'duration': 5290.027,
+ 'timestamp': 1570694400,
+ 'upload_date': '20191010',
+ 'creator': 'ID - Investigation Discovery',
+ 'series': 'Ted Bundy: Mind Of A Monster',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ }, {
+ # disco-api
+ 'url': 'https://www.dplay.no/videoer/i-kongens-klr/sesong-1-episode-7',
+ 'info_dict': {
+ 'id': '40206',
+ 'display_id': 'i-kongens-klr/sesong-1-episode-7',
+ 'ext': 'mp4',
+ 'title': 'Episode 7',
+ 'description': 'md5:e3e1411b2b9aebeea36a6ec5d50c60cf',
+ 'duration': 2611.16,
+ 'timestamp': 1516726800,
+ 'upload_date': '20180123',
+ 'series': 'I kongens klær',
+ 'season_number': 1,
+ 'episode_number': 7,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ 'skip': 'Available for Premium users',
+ }, {
+ 'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/',
+ 'md5': '2b808ffb00fc47b884a172ca5d13053c',
+ 'info_dict': {
+ 'id': '6918',
+ 'display_id': 'biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij',
+ 'ext': 'mp4',
+ 'title': 'Luigi Di Maio: la psicosi di Stanislawskij',
+ 'description': 'md5:3c7a4303aef85868f867a26f5cc14813',
+ 'thumbnail': r're:^https?://.*\.jpe?g',
+ 'upload_date': '20160524',
+ 'timestamp': 1464076800,
+ 'series': 'Biografie imbarazzanti',
+ 'season_number': 1,
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ },
+ }, {
+ 'url': 'https://es.dplay.com/dmax/la-fiebre-del-oro/temporada-8-episodio-1/',
+ 'info_dict': {
+ 'id': '21652',
+ 'display_id': 'la-fiebre-del-oro/temporada-8-episodio-1',
+ 'ext': 'mp4',
+ 'title': 'Episodio 1',
+ 'description': 'md5:b9dcff2071086e003737485210675f69',
+ 'thumbnail': r're:^https?://.*\.png',
+ 'upload_date': '20180709',
+ 'timestamp': 1531173540,
+ 'series': 'La fiebre del oro',
+ 'season_number': 8,
+ 'episode': 'Episode 1',
+ 'episode_number': 1,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.dplay.fi/videot/shifting-gears-with-aaron-kaufman/episode-16',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.dplay.jp/video/gold-rush/24086',
+ 'only_matching': True,
+ }]
+
+ def _get_disco_api_info(self, url, display_id, disco_host, realm, country):
+ geo_countries = [country.upper()]
+ self._initialize_geo_bypass({
+ 'countries': geo_countries,
+ })
+ disco_base = 'https://%s/' % disco_host
+ token = self._download_json(
+ disco_base + 'token', display_id, 'Downloading token',
+ query={
+ 'realm': realm,
+ })['data']['attributes']['token']
+ headers = {
+ 'Referer': url,
+ 'Authorization': 'Bearer ' + token,
+ }
+ video = self._download_json(
+ disco_base + 'content/videos/' + display_id, display_id,
+ headers=headers, query={
+ 'fields[channel]': 'name',
+ 'fields[image]': 'height,src,width',
+ 'fields[show]': 'name',
+ 'fields[tag]': 'name',
+ 'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration',
+ 'include': 'images,primaryChannel,show,tags'
+ })
+ video_id = video['data']['id']
+ info = video['data']['attributes']
+ title = info['name'].strip()
+ formats = []
+ try:
+ streaming = self._download_json(
+ disco_base + 'playback/videoPlaybackInfo/' + video_id,
+ display_id, headers=headers)['data']['attributes']['streaming']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
+ error = info['errors'][0]
+ error_code = error.get('code')
+ if error_code == 'access.denied.geoblocked':
+ self.raise_geo_restricted(countries=geo_countries)
+ elif error_code == 'access.denied.missingpackage':
+ self.raise_login_required()
+ raise ExtractorError(info['errors'][0]['detail'], expected=True)
+ raise
+ for format_id, format_dict in streaming.items():
+ if not isinstance(format_dict, dict):
+ continue
+ format_url = format_dict.get('url')
+ if not format_url:
+ continue
+ ext = determine_ext(format_url)
+ if format_id == 'dash' or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ format_url, display_id, mpd_id='dash', fatal=False))
+ elif format_id == 'hls' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, display_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls',
+ fatal=False))
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ })
+ self._sort_formats(formats)
+
+ creator = series = None
+ tags = []
+ thumbnails = []
+ included = video.get('included') or []
+ if isinstance(included, list):
+ for e in included:
+ attributes = e.get('attributes')
+ if not attributes:
+ continue
+ e_type = e.get('type')
+ if e_type == 'channel':
+ creator = attributes.get('name')
+ elif e_type == 'image':
+ src = attributes.get('src')
+ if src:
+ thumbnails.append({
+ 'url': src,
+ 'width': int_or_none(attributes.get('width')),
+ 'height': int_or_none(attributes.get('height')),
+ })
+ if e_type == 'show':
+ series = attributes.get('name')
+ elif e_type == 'tag':
+ name = attributes.get('name')
+ if name:
+ tags.append(name)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': info.get('description'),
+ 'duration': float_or_none(info.get('videoDuration'), 1000),
+ 'timestamp': unified_timestamp(info.get('publishStart')),
+ 'series': series,
+ 'season_number': int_or_none(info.get('seasonNumber')),
+ 'episode_number': int_or_none(info.get('episodeNumber')),
+ 'creator': creator,
+ 'tags': tags,
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id = mobj.group('id')
+ domain = mobj.group('domain').lstrip('www.')
+ country = mobj.group('country') or mobj.group('subdomain_country')
+ host = 'disco-api.' + domain if domain.startswith('dplay.') else 'eu2-prod.disco-api.com'
+ return self._get_disco_api_info(
+ url, display_id, host, 'dplay' + country, country)
diff --git a/youtube_dl/extractor/drbonanza.py b/youtube_dlc/extractor/drbonanza.py
index 164e97c36..164e97c36 100644
--- a/youtube_dl/extractor/drbonanza.py
+++ b/youtube_dlc/extractor/drbonanza.py
diff --git a/youtube_dlc/extractor/dropbox.py b/youtube_dlc/extractor/dropbox.py
new file mode 100644
index 000000000..9dc6614c5
--- /dev/null
+++ b/youtube_dlc/extractor/dropbox.py
@@ -0,0 +1,40 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import os.path
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
+from ..utils import url_basename
+
+
+class DropboxIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?dropbox[.]com/sh?/(?P<id>[a-zA-Z0-9]{15})/.*'
+ _TESTS = [
+ {
+ 'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dlc%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
+ 'info_dict': {
+ 'id': 'nelirfsxnmcfbfh',
+ 'ext': 'mp4',
+ 'title': 'youtube-dlc test video \'ä"BaW_jenozKc'
+ }
+ }, {
+ 'url': 'https://www.dropbox.com/sh/662glsejgzoj9sr/AAByil3FGH9KFNZ13e08eSa1a/Pregame%20Ceremony%20Program%20PA%2020140518.m4v',
+ 'only_matching': True,
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ fn = compat_urllib_parse_unquote(url_basename(url))
+ title = os.path.splitext(fn)[0]
+ video_url = re.sub(r'[?&]dl=0', '', url)
+ video_url += ('?' if '?' not in video_url else '&') + 'dl=1'
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ }
diff --git a/youtube_dl/extractor/drtuber.py b/youtube_dlc/extractor/drtuber.py
index 2baea585b..2baea585b 100644
--- a/youtube_dl/extractor/drtuber.py
+++ b/youtube_dlc/extractor/drtuber.py
diff --git a/youtube_dlc/extractor/drtv.py b/youtube_dlc/extractor/drtv.py
new file mode 100644
index 000000000..390e79f8c
--- /dev/null
+++ b/youtube_dlc/extractor/drtv.py
@@ -0,0 +1,352 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import binascii
+import hashlib
+import re
+
+
+from .common import InfoExtractor
+from ..aes import aes_cbc_decrypt
+from ..compat import compat_urllib_parse_unquote
+from ..utils import (
+ bytes_to_intlist,
+ ExtractorError,
+ int_or_none,
+ intlist_to_bytes,
+ float_or_none,
+ mimetype2ext,
+ str_or_none,
+ try_get,
+ unified_timestamp,
+ update_url_query,
+ url_or_none,
+)
+
+
+class DRTVIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:www\.)?dr\.dk/(?:tv/se|nyheder|radio(?:/ondemand)?)/(?:[^/]+/)*|
+ (?:www\.)?(?:dr\.dk|dr-massive\.com)/drtv/(?:se|episode)/
+ )
+ (?P<id>[\da-z_-]+)
+ '''
+ _GEO_BYPASS = False
+ _GEO_COUNTRIES = ['DK']
+ IE_NAME = 'drtv'
+ _TESTS = [{
+ 'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
+ 'md5': '25e659cccc9a2ed956110a299fdf5983',
+ 'info_dict': {
+ 'id': 'klassen-darlig-taber-10',
+ 'ext': 'mp4',
+ 'title': 'Klassen - Dårlig taber (10)',
+ 'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa',
+ 'timestamp': 1539085800,
+ 'upload_date': '20181009',
+ 'duration': 606.84,
+ 'series': 'Klassen',
+ 'season': 'Klassen I',
+ 'season_number': 1,
+ 'season_id': 'urn:dr:mu:bundle:57d7e8216187a4031cfd6f6b',
+ 'episode': 'Episode 10',
+ 'episode_number': 10,
+ 'release_year': 2016,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest'],
+ }, {
+ # embed
+ 'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang',
+ 'info_dict': {
+ 'id': 'urn:dr:mu:programcard:57c926176187a50a9c6e83c6',
+ 'ext': 'mp4',
+ 'title': 'christiania pusher street ryddes drdkrjpo',
+ 'description': 'md5:2a71898b15057e9b97334f61d04e6eb5',
+ 'timestamp': 1472800279,
+ 'upload_date': '20160902',
+ 'duration': 131.4,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download f4m manifest'],
+ }, {
+ # with SignLanguage formats
+ 'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder',
+ 'info_dict': {
+ 'id': 'historien-om-danmark-stenalder',
+ 'ext': 'mp4',
+ 'title': 'Historien om Danmark: Stenalder',
+ 'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a',
+ 'timestamp': 1546628400,
+ 'upload_date': '20190104',
+ 'duration': 3502.56,
+ 'formats': 'mincount:20',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.dr.dk/radio/p4kbh/regionale-nyheder-kh4/p4-nyheder-2019-06-26-17-30-9',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.dr.dk/drtv/se/bonderoeven_71769',
+ 'info_dict': {
+ 'id': '00951930010',
+ 'ext': 'mp4',
+ 'title': 'Bonderøven (1:8)',
+ 'description': 'md5:3cf18fc0d3b205745d4505f896af8121',
+ 'timestamp': 1546542000,
+ 'upload_date': '20190103',
+ 'duration': 2576.6,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.dr.dk/drtv/episode/bonderoeven_71769',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://dr-massive.com/drtv/se/bonderoeven_71769',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ if '>Programmet er ikke længere tilgængeligt' in webpage:
+ raise ExtractorError(
+ 'Video %s is not available' % video_id, expected=True)
+
+ video_id = self._search_regex(
+ (r'data-(?:material-identifier|episode-slug)="([^"]+)"',
+ r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'),
+ webpage, 'video id', default=None)
+
+ if not video_id:
+ video_id = self._search_regex(
+ r'(urn(?:%3A|:)dr(?:%3A|:)mu(?:%3A|:)programcard(?:%3A|:)[\da-f]+)',
+ webpage, 'urn', default=None)
+ if video_id:
+ video_id = compat_urllib_parse_unquote(video_id)
+
+ _PROGRAMCARD_BASE = 'https://www.dr.dk/mu-online/api/1.4/programcard'
+ query = {'expanded': 'true'}
+
+ if video_id:
+ programcard_url = '%s/%s' % (_PROGRAMCARD_BASE, video_id)
+ else:
+ programcard_url = _PROGRAMCARD_BASE
+ page = self._parse_json(
+ self._search_regex(
+ r'data\s*=\s*({.+?})\s*(?:;|</script)', webpage,
+ 'data'), '1')['cache']['page']
+ page = page[list(page.keys())[0]]
+ item = try_get(
+ page, (lambda x: x['item'], lambda x: x['entries'][0]['item']),
+ dict)
+ video_id = item['customId'].split(':')[-1]
+ query['productionnumber'] = video_id
+
+ data = self._download_json(
+ programcard_url, video_id, 'Downloading video JSON', query=query)
+
+ title = str_or_none(data.get('Title')) or re.sub(
+ r'\s*\|\s*(?:TV\s*\|\s*DR|DRTV)$', '',
+ self._og_search_title(webpage))
+ description = self._og_search_description(
+ webpage, default=None) or data.get('Description')
+
+ timestamp = unified_timestamp(
+ data.get('PrimaryBroadcastStartTime') or data.get('SortDateTime'))
+
+ thumbnail = None
+ duration = None
+
+ restricted_to_denmark = False
+
+ formats = []
+ subtitles = {}
+
+ assets = []
+ primary_asset = data.get('PrimaryAsset')
+ if isinstance(primary_asset, dict):
+ assets.append(primary_asset)
+ secondary_assets = data.get('SecondaryAssets')
+ if isinstance(secondary_assets, list):
+ for secondary_asset in secondary_assets:
+ if isinstance(secondary_asset, dict):
+ assets.append(secondary_asset)
+
+ def hex_to_bytes(hex):
+ return binascii.a2b_hex(hex.encode('ascii'))
+
+ def decrypt_uri(e):
+ n = int(e[2:10], 16)
+ a = e[10 + n:]
+ data = bytes_to_intlist(hex_to_bytes(e[10:10 + n]))
+ key = bytes_to_intlist(hashlib.sha256(
+ ('%s:sRBzYNXBzkKgnjj8pGtkACch' % a).encode('utf-8')).digest())
+ iv = bytes_to_intlist(hex_to_bytes(a))
+ decrypted = aes_cbc_decrypt(data, key, iv)
+ return intlist_to_bytes(
+ decrypted[:-decrypted[-1]]).decode('utf-8').split('?')[0]
+
+ for asset in assets:
+ kind = asset.get('Kind')
+ if kind == 'Image':
+ thumbnail = url_or_none(asset.get('Uri'))
+ elif kind in ('VideoResource', 'AudioResource'):
+ duration = float_or_none(asset.get('DurationInMilliseconds'), 1000)
+ restricted_to_denmark = asset.get('RestrictedToDenmark')
+ asset_target = asset.get('Target')
+ for link in asset.get('Links', []):
+ uri = link.get('Uri')
+ if not uri:
+ encrypted_uri = link.get('EncryptedUri')
+ if not encrypted_uri:
+ continue
+ try:
+ uri = decrypt_uri(encrypted_uri)
+ except Exception:
+ self.report_warning(
+ 'Unable to decrypt EncryptedUri', video_id)
+ continue
+ uri = url_or_none(uri)
+ if not uri:
+ continue
+ target = link.get('Target')
+ format_id = target or ''
+ if asset_target in ('SpokenSubtitles', 'SignLanguage', 'VisuallyInterpreted'):
+ preference = -1
+ format_id += '-%s' % asset_target
+ elif asset_target == 'Default':
+ preference = 1
+ else:
+ preference = None
+ if target == 'HDS':
+ f4m_formats = self._extract_f4m_formats(
+ uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
+ video_id, preference, f4m_id=format_id, fatal=False)
+ if kind == 'AudioResource':
+ for f in f4m_formats:
+ f['vcodec'] = 'none'
+ formats.extend(f4m_formats)
+ elif target == 'HLS':
+ formats.extend(self._extract_m3u8_formats(
+ uri, video_id, 'mp4', entry_protocol='m3u8_native',
+ preference=preference, m3u8_id=format_id,
+ fatal=False))
+ else:
+ bitrate = link.get('Bitrate')
+ if bitrate:
+ format_id += '-%s' % bitrate
+ formats.append({
+ 'url': uri,
+ 'format_id': format_id,
+ 'tbr': int_or_none(bitrate),
+ 'ext': link.get('FileFormat'),
+ 'vcodec': 'none' if kind == 'AudioResource' else None,
+ 'preference': preference,
+ })
+ subtitles_list = asset.get('SubtitlesList') or asset.get('Subtitleslist')
+ if isinstance(subtitles_list, list):
+ LANGS = {
+ 'Danish': 'da',
+ }
+ for subs in subtitles_list:
+ if not isinstance(subs, dict):
+ continue
+ sub_uri = url_or_none(subs.get('Uri'))
+ if not sub_uri:
+ continue
+ lang = subs.get('Language') or 'da'
+ subtitles.setdefault(LANGS.get(lang, lang), []).append({
+ 'url': sub_uri,
+ 'ext': mimetype2ext(subs.get('MimeType')) or 'vtt'
+ })
+
+ if not formats and restricted_to_denmark:
+ self.raise_geo_restricted(
+ 'Unfortunately, DR is not allowed to show this program outside Denmark.',
+ countries=self._GEO_COUNTRIES)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'series': str_or_none(data.get('SeriesTitle')),
+ 'season': str_or_none(data.get('SeasonTitle')),
+ 'season_number': int_or_none(data.get('SeasonNumber')),
+ 'season_id': str_or_none(data.get('SeasonUrn')),
+ 'episode': str_or_none(data.get('EpisodeTitle')),
+ 'episode_number': int_or_none(data.get('EpisodeNumber')),
+ 'release_year': int_or_none(data.get('ProductionYear')),
+ }
+
+
+class DRTVLiveIE(InfoExtractor):
+ IE_NAME = 'drtv:live'
+ _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv|TV)/live/(?P<id>[\da-z-]+)'
+ _GEO_COUNTRIES = ['DK']
+ _TEST = {
+ 'url': 'https://www.dr.dk/tv/live/dr1',
+ 'info_dict': {
+ 'id': 'dr1',
+ 'ext': 'mp4',
+ 'title': 're:^DR1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+ channel_data = self._download_json(
+ 'https://www.dr.dk/mu-online/api/1.0/channel/' + channel_id,
+ channel_id)
+ title = self._live_title(channel_data['Title'])
+
+ formats = []
+ for streaming_server in channel_data.get('StreamingServers', []):
+ server = streaming_server.get('Server')
+ if not server:
+ continue
+ link_type = streaming_server.get('LinkType')
+ for quality in streaming_server.get('Qualities', []):
+ for stream in quality.get('Streams', []):
+ stream_path = stream.get('Stream')
+ if not stream_path:
+ continue
+ stream_url = update_url_query(
+ '%s/%s' % (server, stream_path), {'b': ''})
+ if link_type == 'HLS':
+ formats.extend(self._extract_m3u8_formats(
+ stream_url, channel_id, 'mp4',
+ m3u8_id=link_type, fatal=False, live=True))
+ elif link_type == 'HDS':
+ formats.extend(self._extract_f4m_formats(update_url_query(
+ '%s/%s' % (server, stream_path), {'hdcore': '3.7.0'}),
+ channel_id, f4m_id=link_type, fatal=False))
+ self._sort_formats(formats)
+
+ return {
+ 'id': channel_id,
+ 'title': title,
+ 'thumbnail': channel_data.get('PrimaryImageUri'),
+ 'formats': formats,
+ 'is_live': True,
+ }
diff --git a/youtube_dl/extractor/dtube.py b/youtube_dlc/extractor/dtube.py
index 114d2dbe3..114d2dbe3 100644
--- a/youtube_dl/extractor/dtube.py
+++ b/youtube_dlc/extractor/dtube.py
diff --git a/youtube_dlc/extractor/dumpert.py b/youtube_dlc/extractor/dumpert.py
new file mode 100644
index 000000000..d9d9afdec
--- /dev/null
+++ b/youtube_dlc/extractor/dumpert.py
@@ -0,0 +1,80 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ qualities,
+)
+
+
+class DumpertIE(InfoExtractor):
+ _VALID_URL = r'(?P<protocol>https?)://(?:(?:www|legacy)\.)?dumpert\.nl/(?:mediabase|embed|item)/(?P<id>[0-9]+[/_][0-9a-zA-Z]+)'
+ _TESTS = [{
+ 'url': 'https://www.dumpert.nl/item/6646981_951bc60f',
+ 'md5': '1b9318d7d5054e7dcb9dc7654f21d643',
+ 'info_dict': {
+ 'id': '6646981/951bc60f',
+ 'ext': 'mp4',
+ 'title': 'Ik heb nieuws voor je',
+ 'description': 'Niet schrikken hoor',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }, {
+ 'url': 'https://www.dumpert.nl/embed/6675421_dc440fe7',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://legacy.dumpert.nl/mediabase/6646981/951bc60f',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://legacy.dumpert.nl/embed/6675421/dc440fe7',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url).replace('_', '/')
+ item = self._download_json(
+ 'http://api-live.dumpert.nl/mobile_api/json/info/' + video_id.replace('/', '_'),
+ video_id)['items'][0]
+ title = item['title']
+ media = next(m for m in item['media'] if m.get('mediatype') == 'VIDEO')
+
+ quality = qualities(['flv', 'mobile', 'tablet', '720p'])
+ formats = []
+ for variant in media.get('variants', []):
+ uri = variant.get('uri')
+ if not uri:
+ continue
+ version = variant.get('version')
+ formats.append({
+ 'url': uri,
+ 'format_id': version,
+ 'quality': quality(version),
+ })
+ self._sort_formats(formats)
+
+ thumbnails = []
+ stills = item.get('stills') or {}
+ for t in ('thumb', 'still'):
+ for s in ('', '-medium', '-large'):
+ still_id = t + s
+ still_url = stills.get(still_id)
+ if not still_url:
+ continue
+ thumbnails.append({
+ 'id': still_id,
+ 'url': still_url,
+ })
+
+ stats = item.get('stats') or {}
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': item.get('description'),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ 'duration': int_or_none(media.get('duration')),
+ 'like_count': int_or_none(stats.get('kudos_total')),
+ 'view_count': int_or_none(stats.get('views_total')),
+ }
diff --git a/youtube_dl/extractor/dvtv.py b/youtube_dlc/extractor/dvtv.py
index de7f6d670..de7f6d670 100644
--- a/youtube_dl/extractor/dvtv.py
+++ b/youtube_dlc/extractor/dvtv.py
diff --git a/youtube_dl/extractor/dw.py b/youtube_dlc/extractor/dw.py
index d740652f1..d740652f1 100644
--- a/youtube_dl/extractor/dw.py
+++ b/youtube_dlc/extractor/dw.py
diff --git a/youtube_dl/extractor/eagleplatform.py b/youtube_dlc/extractor/eagleplatform.py
index 36fef07b7..36fef07b7 100644
--- a/youtube_dl/extractor/eagleplatform.py
+++ b/youtube_dlc/extractor/eagleplatform.py
diff --git a/youtube_dl/extractor/ebaumsworld.py b/youtube_dlc/extractor/ebaumsworld.py
index c97682cd3..c97682cd3 100644
--- a/youtube_dl/extractor/ebaumsworld.py
+++ b/youtube_dlc/extractor/ebaumsworld.py
diff --git a/youtube_dl/extractor/echomsk.py b/youtube_dlc/extractor/echomsk.py
index 6b7cc652f..6b7cc652f 100644
--- a/youtube_dl/extractor/echomsk.py
+++ b/youtube_dlc/extractor/echomsk.py
diff --git a/youtube_dl/extractor/egghead.py b/youtube_dlc/extractor/egghead.py
index df11dc206..df11dc206 100644
--- a/youtube_dl/extractor/egghead.py
+++ b/youtube_dlc/extractor/egghead.py
diff --git a/youtube_dl/extractor/ehow.py b/youtube_dlc/extractor/ehow.py
index b1cd4f5d4..b1cd4f5d4 100644
--- a/youtube_dl/extractor/ehow.py
+++ b/youtube_dlc/extractor/ehow.py
diff --git a/youtube_dlc/extractor/eighttracks.py b/youtube_dlc/extractor/eighttracks.py
new file mode 100644
index 000000000..5ededd31d
--- /dev/null
+++ b/youtube_dlc/extractor/eighttracks.py
@@ -0,0 +1,164 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import random
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+)
+from ..utils import (
+ ExtractorError,
+)
+
+
+class EightTracksIE(InfoExtractor):
+ IE_NAME = '8tracks'
+ _VALID_URL = r'https?://8tracks\.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
+ _TEST = {
+ 'name': 'EightTracks',
+ 'url': 'http://8tracks.com/ytdl/youtube-dlc-test-tracks-a',
+ 'info_dict': {
+ 'id': '1336550',
+ 'display_id': 'youtube-dlc-test-tracks-a',
+ 'description': "test chars: \"'/\\ä↭",
+ 'title': "youtube-dlc test tracks \"'/\\ä↭<>",
+ },
+ 'playlist': [
+ {
+ 'md5': '96ce57f24389fc8734ce47f4c1abcc55',
+ 'info_dict': {
+ 'id': '11885610',
+ 'ext': 'm4a',
+ 'title': "youtue-dl project<>\"' - youtube-dlc test track 1 \"'/\\\u00e4\u21ad",
+ 'uploader_id': 'ytdl'
+ }
+ },
+ {
+ 'md5': '4ab26f05c1f7291ea460a3920be8021f',
+ 'info_dict': {
+ 'id': '11885608',
+ 'ext': 'm4a',
+ 'title': "youtube-dlc project - youtube-dlc test track 2 \"'/\\\u00e4\u21ad",
+ 'uploader_id': 'ytdl'
+ }
+ },
+ {
+ 'md5': 'd30b5b5f74217410f4689605c35d1fd7',
+ 'info_dict': {
+ 'id': '11885679',
+ 'ext': 'm4a',
+ 'title': "youtube-dlc project as well - youtube-dlc test track 3 \"'/\\\u00e4\u21ad",
+ 'uploader_id': 'ytdl'
+ }
+ },
+ {
+ 'md5': '4eb0a669317cd725f6bbd336a29f923a',
+ 'info_dict': {
+ 'id': '11885680',
+ 'ext': 'm4a',
+ 'title': "youtube-dlc project as well - youtube-dlc test track 4 \"'/\\\u00e4\u21ad",
+ 'uploader_id': 'ytdl'
+ }
+ },
+ {
+ 'md5': '1893e872e263a2705558d1d319ad19e8',
+ 'info_dict': {
+ 'id': '11885682',
+ 'ext': 'm4a',
+ 'title': "PH - youtube-dlc test track 5 \"'/\\\u00e4\u21ad",
+ 'uploader_id': 'ytdl'
+ }
+ },
+ {
+ 'md5': 'b673c46f47a216ab1741ae8836af5899',
+ 'info_dict': {
+ 'id': '11885683',
+ 'ext': 'm4a',
+ 'title': "PH - youtube-dlc test track 6 \"'/\\\u00e4\u21ad",
+ 'uploader_id': 'ytdl'
+ }
+ },
+ {
+ 'md5': '1d74534e95df54986da7f5abf7d842b7',
+ 'info_dict': {
+ 'id': '11885684',
+ 'ext': 'm4a',
+ 'title': "phihag - youtube-dlc test track 7 \"'/\\\u00e4\u21ad",
+ 'uploader_id': 'ytdl'
+ }
+ },
+ {
+ 'md5': 'f081f47af8f6ae782ed131d38b9cd1c0',
+ 'info_dict': {
+ 'id': '11885685',
+ 'ext': 'm4a',
+ 'title': "phihag - youtube-dlc test track 8 \"'/\\\u00e4\u21ad",
+ 'uploader_id': 'ytdl'
+ }
+ }
+ ]
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ data = self._parse_json(
+ self._search_regex(
+ r"(?s)PAGE\.mix\s*=\s*({.+?});\n", webpage, 'trax information'),
+ playlist_id)
+
+ session = str(random.randint(0, 1000000000))
+ mix_id = data['id']
+ track_count = data['tracks_count']
+ duration = data['duration']
+ avg_song_duration = float(duration) / track_count
+ # duration is sometimes negative, use predefined avg duration
+ if avg_song_duration <= 0:
+ avg_song_duration = 300
+ first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
+ next_url = first_url
+ entries = []
+
+ for i in range(track_count):
+ api_json = None
+ download_tries = 0
+
+ while api_json is None:
+ try:
+ api_json = self._download_webpage(
+ next_url, playlist_id,
+ note='Downloading song information %d/%d' % (i + 1, track_count),
+ errnote='Failed to download song information')
+ except ExtractorError:
+ if download_tries > 3:
+ raise
+ else:
+ download_tries += 1
+ self._sleep(avg_song_duration, playlist_id)
+
+ api_data = json.loads(api_json)
+ track_data = api_data['set']['track']
+ info = {
+ 'id': compat_str(track_data['id']),
+ 'url': track_data['track_file_stream_url'],
+ 'title': track_data['performer'] + ' - ' + track_data['name'],
+ 'raw_title': track_data['name'],
+ 'uploader_id': data['user']['login'],
+ 'ext': 'm4a',
+ }
+ entries.append(info)
+
+ next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (
+ session, mix_id, track_data['id'])
+ return {
+ '_type': 'playlist',
+ 'entries': entries,
+ 'id': compat_str(mix_id),
+ 'display_id': playlist_id,
+ 'title': data.get('name'),
+ 'description': data.get('description'),
+ }
diff --git a/youtube_dl/extractor/einthusan.py b/youtube_dlc/extractor/einthusan.py
index 4e0f8bc81..4e0f8bc81 100644
--- a/youtube_dl/extractor/einthusan.py
+++ b/youtube_dlc/extractor/einthusan.py
diff --git a/youtube_dl/extractor/eitb.py b/youtube_dlc/extractor/eitb.py
index ee5ead18b..ee5ead18b 100644
--- a/youtube_dl/extractor/eitb.py
+++ b/youtube_dlc/extractor/eitb.py
diff --git a/youtube_dl/extractor/ellentube.py b/youtube_dlc/extractor/ellentube.py
index 544473274..544473274 100644
--- a/youtube_dl/extractor/ellentube.py
+++ b/youtube_dlc/extractor/ellentube.py
diff --git a/youtube_dl/extractor/elpais.py b/youtube_dlc/extractor/elpais.py
index b89f6db62..b89f6db62 100644
--- a/youtube_dl/extractor/elpais.py
+++ b/youtube_dlc/extractor/elpais.py
diff --git a/youtube_dl/extractor/embedly.py b/youtube_dlc/extractor/embedly.py
index a5820b21e..a5820b21e 100644
--- a/youtube_dl/extractor/embedly.py
+++ b/youtube_dlc/extractor/embedly.py
diff --git a/youtube_dl/extractor/engadget.py b/youtube_dlc/extractor/engadget.py
index 65635c18b..65635c18b 100644
--- a/youtube_dl/extractor/engadget.py
+++ b/youtube_dlc/extractor/engadget.py
diff --git a/youtube_dlc/extractor/eporner.py b/youtube_dlc/extractor/eporner.py
new file mode 100644
index 000000000..fe42821c7
--- /dev/null
+++ b/youtube_dlc/extractor/eporner.py
@@ -0,0 +1,129 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ encode_base_n,
+ ExtractorError,
+ int_or_none,
+ merge_dicts,
+ parse_duration,
+ str_to_int,
+ url_or_none,
+)
+
+
+class EpornerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:hd-porn|embed)/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
+ _TESTS = [{
+ 'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
+ 'md5': '39d486f046212d8e1b911c52ab4691f8',
+ 'info_dict': {
+ 'id': 'qlDUmNsj6VS',
+ 'display_id': 'Infamous-Tiffany-Teen-Strip-Tease-Video',
+ 'ext': 'mp4',
+ 'title': 'Infamous Tiffany Teen Strip Tease Video',
+ 'description': 'md5:764f39abf932daafa37485eb46efa152',
+ 'timestamp': 1232520922,
+ 'upload_date': '20090121',
+ 'duration': 1838,
+ 'view_count': int,
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'proxy': '127.0.0.1:8118'
+ }
+ }, {
+ # New (May 2016) URL layout
+ 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0/Star-Wars-XXX-Parody/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id') or video_id
+
+ webpage, urlh = self._download_webpage_handle(url, display_id)
+
+ video_id = self._match_id(urlh.geturl())
+
+ hash = self._search_regex(
+ r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash')
+
+ title = self._og_search_title(webpage, default=None) or self._html_search_regex(
+ r'<title>(.+?) - EPORNER', webpage, 'title')
+
+ # Reverse engineered from vjs.js
+ def calc_hash(s):
+ return ''.join((encode_base_n(int(s[lb:lb + 8], 16), 36) for lb in range(0, 32, 8)))
+
+ video = self._download_json(
+ 'http://www.eporner.com/xhr/video/%s' % video_id,
+ display_id, note='Downloading video JSON',
+ query={
+ 'hash': calc_hash(hash),
+ 'device': 'generic',
+ 'domain': 'www.eporner.com',
+ 'fallback': 'false',
+ })
+
+ if video.get('available') is False:
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, video['message']), expected=True)
+
+ sources = video['sources']
+
+ formats = []
+ for kind, formats_dict in sources.items():
+ if not isinstance(formats_dict, dict):
+ continue
+ for format_id, format_dict in formats_dict.items():
+ if not isinstance(format_dict, dict):
+ continue
+ src = url_or_none(format_dict.get('src'))
+ if not src or not src.startswith('http'):
+ continue
+ if kind == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ src, display_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id=kind, fatal=False))
+ else:
+ height = int_or_none(self._search_regex(
+ r'(\d+)[pP]', format_id, 'height', default=None))
+ fps = int_or_none(self._search_regex(
+ r'(\d+)fps', format_id, 'fps', default=None))
+
+ formats.append({
+ 'url': src,
+ 'format_id': format_id,
+ 'height': height,
+ 'fps': fps,
+ })
+ self._sort_formats(formats)
+
+ json_ld = self._search_json_ld(webpage, display_id, default={})
+
+ duration = parse_duration(self._html_search_meta(
+ 'duration', webpage, default=None))
+ view_count = str_to_int(self._search_regex(
+ r'id="cinemaviews">\s*([0-9,]+)\s*<small>views',
+ webpage, 'view count', fatal=False))
+
+ return merge_dicts(json_ld, {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'formats': formats,
+ 'age_limit': 18,
+ })
diff --git a/youtube_dl/extractor/eroprofile.py b/youtube_dlc/extractor/eroprofile.py
index c08643a17..c08643a17 100644
--- a/youtube_dl/extractor/eroprofile.py
+++ b/youtube_dlc/extractor/eroprofile.py
diff --git a/youtube_dl/extractor/escapist.py b/youtube_dlc/extractor/escapist.py
index 4cd815ebc..4cd815ebc 100644
--- a/youtube_dl/extractor/escapist.py
+++ b/youtube_dlc/extractor/escapist.py
diff --git a/youtube_dl/extractor/espn.py b/youtube_dlc/extractor/espn.py
index 6cf05e6da..6cf05e6da 100644
--- a/youtube_dl/extractor/espn.py
+++ b/youtube_dlc/extractor/espn.py
diff --git a/youtube_dl/extractor/esri.py b/youtube_dlc/extractor/esri.py
index e9dcaeb1d..e9dcaeb1d 100644
--- a/youtube_dl/extractor/esri.py
+++ b/youtube_dlc/extractor/esri.py
diff --git a/youtube_dl/extractor/europa.py b/youtube_dlc/extractor/europa.py
index 1efc0b2ec..1efc0b2ec 100644
--- a/youtube_dl/extractor/europa.py
+++ b/youtube_dlc/extractor/europa.py
diff --git a/youtube_dl/extractor/everyonesmixtape.py b/youtube_dlc/extractor/everyonesmixtape.py
index 84a9b750e..84a9b750e 100644
--- a/youtube_dl/extractor/everyonesmixtape.py
+++ b/youtube_dlc/extractor/everyonesmixtape.py
diff --git a/youtube_dl/extractor/expotv.py b/youtube_dlc/extractor/expotv.py
index 95a897782..95a897782 100644
--- a/youtube_dl/extractor/expotv.py
+++ b/youtube_dlc/extractor/expotv.py
diff --git a/youtube_dl/extractor/expressen.py b/youtube_dlc/extractor/expressen.py
index f79365038..f79365038 100644
--- a/youtube_dl/extractor/expressen.py
+++ b/youtube_dlc/extractor/expressen.py
diff --git a/youtube_dlc/extractor/extractors.py b/youtube_dlc/extractor/extractors.py
new file mode 100644
index 000000000..4a3256fd9
--- /dev/null
+++ b/youtube_dlc/extractor/extractors.py
@@ -0,0 +1,1528 @@
+# flake8: noqa
+from __future__ import unicode_literals
+
+from .abc import (
+ ABCIE,
+ ABCIViewIE,
+)
+from .abcnews import (
+ AbcNewsIE,
+ AbcNewsVideoIE,
+)
+from .abcotvs import (
+ ABCOTVSIE,
+ ABCOTVSClipsIE,
+)
+from .academicearth import AcademicEarthCourseIE
+from .acast import (
+ ACastIE,
+ ACastChannelIE,
+)
+from .adn import ADNIE
+from .adobeconnect import AdobeConnectIE
+from .adobetv import (
+ AdobeTVEmbedIE,
+ AdobeTVIE,
+ AdobeTVShowIE,
+ AdobeTVChannelIE,
+ AdobeTVVideoIE,
+)
+from .adultswim import AdultSwimIE
+from .aenetworks import (
+ AENetworksIE,
+ HistoryTopicIE,
+)
+from .afreecatv import AfreecaTVIE
+from .airmozilla import AirMozillaIE
+from .aljazeera import AlJazeeraIE
+from .alphaporno import AlphaPornoIE
+from .amcnetworks import AMCNetworksIE
+from .americastestkitchen import AmericasTestKitchenIE
+from .animeondemand import AnimeOnDemandIE
+from .anvato import AnvatoIE
+from .aol import AolIE
+from .allocine import AllocineIE
+from .aliexpress import AliExpressLiveIE
+from .apa import APAIE
+from .aparat import AparatIE
+from .appleconnect import AppleConnectIE
+from .appletrailers import (
+ AppleTrailersIE,
+ AppleTrailersSectionIE,
+)
+from .archiveorg import ArchiveOrgIE
+from .arkena import ArkenaIE
+from .ard import (
+ ARDBetaMediathekIE,
+ ARDIE,
+ ARDMediathekIE,
+)
+from .arte import (
+ ArteTVPlus7IE,
+ ArteTVEmbedIE,
+ ArteTVPlaylistIE,
+)
+from .asiancrush import (
+ AsianCrushIE,
+ AsianCrushPlaylistIE,
+)
+from .atresplayer import AtresPlayerIE
+from .atttechchannel import ATTTechChannelIE
+from .atvat import ATVAtIE
+from .audimedia import AudiMediaIE
+from .audioboom import AudioBoomIE
+from .audiomack import AudiomackIE, AudiomackAlbumIE
+from .awaan import (
+ AWAANIE,
+ AWAANVideoIE,
+ AWAANLiveIE,
+ AWAANSeasonIE,
+)
+from .azmedien import AZMedienIE
+from .baidu import BaiduVideoIE
+from .bandcamp import BandcampIE, BandcampAlbumIE, BandcampWeeklyIE
+from .bbc import (
+ BBCCoUkIE,
+ BBCCoUkArticleIE,
+ BBCCoUkIPlayerPlaylistIE,
+ BBCCoUkPlaylistIE,
+ BBCIE,
+)
+from .beampro import (
+ BeamProLiveIE,
+ BeamProVodIE,
+)
+from .beeg import BeegIE
+from .behindkink import BehindKinkIE
+from .bellmedia import BellMediaIE
+from .beatport import BeatportIE
+from .bet import BetIE
+from .bfi import BFIPlayerIE
+from .bigflix import BigflixIE
+from .bild import BildIE
+from .bilibili import (
+ BiliBiliIE,
+ BiliBiliBangumiIE,
+ BilibiliAudioIE,
+ BilibiliAudioAlbumIE,
+ BiliBiliPlayerIE,
+)
+from .biobiochiletv import BioBioChileTVIE
+from .bitchute import (
+ BitChuteIE,
+ BitChuteChannelIE,
+)
+from .biqle import BIQLEIE
+from .bleacherreport import (
+ BleacherReportIE,
+ BleacherReportCMSIE,
+)
+from .blinkx import BlinkxIE
+from .bloomberg import BloombergIE
+from .bokecc import BokeCCIE
+from .bostonglobe import BostonGlobeIE
+from .bpb import BpbIE
+from .br import (
+ BRIE,
+ BRMediathekIE,
+)
+from .bravotv import BravoTVIE
+from .breakcom import BreakIE
+from .brightcove import (
+ BrightcoveLegacyIE,
+ BrightcoveNewIE,
+)
+from .businessinsider import BusinessInsiderIE
+from .buzzfeed import BuzzFeedIE
+from .byutv import BYUtvIE
+from .c56 import C56IE
+from .camdemy import (
+ CamdemyIE,
+ CamdemyFolderIE
+)
+from .cammodels import CamModelsIE
+from .camtube import CamTubeIE
+from .camwithher import CamWithHerIE
+from .canalplus import CanalplusIE
+from .canalc2 import Canalc2IE
+from .canvas import (
+ CanvasIE,
+ CanvasEenIE,
+ VrtNUIE,
+)
+from .carambatv import (
+ CarambaTVIE,
+ CarambaTVPageIE,
+)
+from .cartoonnetwork import CartoonNetworkIE
+from .cbc import (
+ CBCIE,
+ CBCPlayerIE,
+ CBCWatchVideoIE,
+ CBCWatchIE,
+ CBCOlympicsIE,
+)
+from .cbs import CBSIE
+from .cbslocal import CBSLocalIE
+from .cbsinteractive import CBSInteractiveIE
+from .cbsnews import (
+ CBSNewsEmbedIE,
+ CBSNewsIE,
+ CBSNewsLiveVideoIE,
+)
+from .cbssports import CBSSportsIE
+from .ccc import (
+ CCCIE,
+ CCCPlaylistIE,
+)
+from .ccma import CCMAIE
+from .cctv import CCTVIE
+from .cda import CDAIE
+from .ceskatelevize import (
+ CeskaTelevizeIE,
+ CeskaTelevizePoradyIE,
+)
+from .channel9 import Channel9IE
+from .charlierose import CharlieRoseIE
+from .chaturbate import ChaturbateIE
+from .chilloutzone import ChilloutzoneIE
+from .chirbit import (
+ ChirbitIE,
+ ChirbitProfileIE,
+)
+from .cinchcast import CinchcastIE
+from .cinemax import CinemaxIE
+from .ciscolive import (
+ CiscoLiveSessionIE,
+ CiscoLiveSearchIE,
+)
+from .cjsw import CJSWIE
+from .cliphunter import CliphunterIE
+from .clippit import ClippitIE
+from .cliprs import ClipRsIE
+from .clipsyndicate import ClipsyndicateIE
+from .closertotruth import CloserToTruthIE
+from .cloudflarestream import CloudflareStreamIE
+from .cloudy import CloudyIE
+from .clubic import ClubicIE
+from .clyp import ClypIE
+from .cmt import CMTIE
+from .cnbc import (
+ CNBCIE,
+ CNBCVideoIE,
+)
+from .cnn import (
+ CNNIE,
+ CNNBlogsIE,
+ CNNArticleIE,
+)
+from .coub import CoubIE
+from .comedycentral import (
+ ComedyCentralFullEpisodesIE,
+ ComedyCentralIE,
+ ComedyCentralShortnameIE,
+ ComedyCentralTVIE,
+ ToshIE,
+)
+from .commonmistakes import CommonMistakesIE, UnicodeBOMIE
+from .commonprotocols import (
+ MmsIE,
+ RtmpIE,
+)
+from .condenast import CondeNastIE
+from .contv import CONtvIE
+from .corus import CorusIE
+from .cracked import CrackedIE
+from .crackle import CrackleIE
+from .crooksandliars import CrooksAndLiarsIE
+from .crunchyroll import (
+ CrunchyrollIE,
+ CrunchyrollShowPlaylistIE
+)
+from .cspan import CSpanIE
+from .ctsnews import CtsNewsIE
+from .ctvnews import CTVNewsIE
+from .cultureunplugged import CultureUnpluggedIE
+from .curiositystream import (
+ CuriosityStreamIE,
+ CuriosityStreamCollectionIE,
+)
+from .cwtv import CWTVIE
+from .dailymail import DailyMailIE
+from .dailymotion import (
+ DailymotionIE,
+ DailymotionPlaylistIE,
+ DailymotionUserIE,
+)
+from .daum import (
+ DaumIE,
+ DaumClipIE,
+ DaumPlaylistIE,
+ DaumUserIE,
+)
+from .dbtv import DBTVIE
+from .dctp import DctpTvIE
+from .deezer import DeezerPlaylistIE
+from .democracynow import DemocracynowIE
+from .dfb import DFBIE
+from .dhm import DHMIE
+from .digg import DiggIE
+from .dotsub import DotsubIE
+from .douyutv import (
+ DouyuShowIE,
+ DouyuTVIE,
+)
+from .dplay import DPlayIE
+from .drbonanza import DRBonanzaIE
+from .drtuber import DrTuberIE
+from .drtv import (
+ DRTVIE,
+ DRTVLiveIE,
+)
+from .dtube import DTubeIE
+from .dvtv import DVTVIE
+from .dumpert import DumpertIE
+from .defense import DefenseGouvFrIE
+from .discovery import DiscoveryIE
+from .discoverygo import (
+ DiscoveryGoIE,
+ DiscoveryGoPlaylistIE,
+)
+from .discoverynetworks import DiscoveryNetworksDeIE
+from .discoveryvr import DiscoveryVRIE
+from .disney import DisneyIE
+from .dispeak import DigitallySpeakingIE
+from .doodstream import DoodStreamIE
+from .dropbox import DropboxIE
+from .dw import (
+ DWIE,
+ DWArticleIE,
+)
+from .eagleplatform import EaglePlatformIE
+from .ebaumsworld import EbaumsWorldIE
+from .echomsk import EchoMskIE
+from .egghead import (
+ EggheadCourseIE,
+ EggheadLessonIE,
+)
+from .ehow import EHowIE
+from .eighttracks import EightTracksIE
+from .einthusan import EinthusanIE
+from .eitb import EitbIE
+from .ellentube import (
+ EllenTubeIE,
+ EllenTubeVideoIE,
+ EllenTubePlaylistIE,
+)
+from .elpais import ElPaisIE
+from .embedly import EmbedlyIE
+from .engadget import EngadgetIE
+from .eporner import EpornerIE
+from .eroprofile import EroProfileIE
+from .escapist import EscapistIE
+from .espn import (
+ ESPNIE,
+ ESPNArticleIE,
+ FiveThirtyEightIE,
+)
+from .esri import EsriVideoIE
+from .europa import EuropaIE
+from .everyonesmixtape import EveryonesMixtapeIE
+from .expotv import ExpoTVIE
+from .expressen import ExpressenIE
+from .extremetube import ExtremeTubeIE
+from .eyedotv import EyedoTVIE
+from .facebook import (
+ FacebookIE,
+ FacebookPluginsVideoIE,
+)
+from .faz import FazIE
+from .fc2 import (
+ FC2IE,
+ FC2EmbedIE,
+)
+from .fczenit import FczenitIE
+from .filmon import (
+ FilmOnIE,
+ FilmOnChannelIE,
+)
+from .filmweb import FilmwebIE
+from .firsttv import FirstTVIE
+from .fivemin import FiveMinIE
+from .fivetv import FiveTVIE
+from .flickr import FlickrIE
+from .folketinget import FolketingetIE
+from .footyroom import FootyRoomIE
+from .formula1 import Formula1IE
+from .fourtube import (
+ FourTubeIE,
+ PornTubeIE,
+ PornerBrosIE,
+ FuxIE,
+)
+from .fox import FOXIE
+from .fox9 import (
+ FOX9IE,
+ FOX9NewsIE,
+)
+from .foxgay import FoxgayIE
+from .foxnews import (
+ FoxNewsIE,
+ FoxNewsArticleIE,
+)
+from .foxsports import FoxSportsIE
+from .franceculture import FranceCultureIE
+from .franceinter import FranceInterIE
+from .francetv import (
+ FranceTVIE,
+ FranceTVSiteIE,
+ FranceTVEmbedIE,
+ FranceTVInfoIE,
+ FranceTVInfoSportIE,
+ FranceTVJeunesseIE,
+ GenerationWhatIE,
+ CultureboxIE,
+)
+from .freesound import FreesoundIE
+from .freespeech import FreespeechIE
+from .freshlive import FreshLiveIE
+from .frontendmasters import (
+ FrontendMastersIE,
+ FrontendMastersLessonIE,
+ FrontendMastersCourseIE
+)
+from .funimation import FunimationIE
+from .funk import FunkIE
+from .fusion import FusionIE
+from .fxnetworks import FXNetworksIE
+from .gaia import GaiaIE
+from .gameinformer import GameInformerIE
+from .gamespot import GameSpotIE
+from .gamestar import GameStarIE
+from .gaskrank import GaskrankIE
+from .gazeta import GazetaIE
+from .gdcvault import GDCVaultIE
+from .generic import GenericIE
+from .gfycat import GfycatIE
+from .giantbomb import GiantBombIE
+from .giga import GigaIE
+from .glide import GlideIE
+from .globo import (
+ GloboIE,
+ GloboArticleIE,
+)
+from .go import GoIE
+from .godtube import GodTubeIE
+from .golem import GolemIE
+from .googledrive import GoogleDriveIE
+from .googleplus import GooglePlusIE
+from .googlesearch import GoogleSearchIE
+from .goshgay import GoshgayIE
+from .gputechconf import GPUTechConfIE
+from .groupon import GrouponIE
+from .hbo import HBOIE
+from .hearthisat import HearThisAtIE
+from .heise import HeiseIE
+from .hellporno import HellPornoIE
+from .helsinki import HelsinkiIE
+from .hentaistigma import HentaiStigmaIE
+from .hgtv import HGTVComShowIE
+from .hketv import HKETVIE
+from .hidive import HiDiveIE
+from .historicfilms import HistoricFilmsIE
+from .hitbox import HitboxIE, HitboxLiveIE
+from .hitrecord import HitRecordIE
+from .hornbunny import HornBunnyIE
+from .hotnewhiphop import HotNewHipHopIE
+from .hotstar import (
+ HotStarIE,
+ HotStarPlaylistIE,
+)
+from .howcast import HowcastIE
+from .howstuffworks import HowStuffWorksIE
+from .hrfensehen import HRFernsehenIE
+from .hrti import (
+ HRTiIE,
+ HRTiPlaylistIE,
+)
+from .huajiao import HuajiaoIE
+from .huffpost import HuffPostIE
+from .hungama import (
+ HungamaIE,
+ HungamaSongIE,
+)
+from .hypem import HypemIE
+from .ign import (
+ IGNIE,
+ OneUPIE,
+ PCMagIE,
+)
+from .imdb import (
+ ImdbIE,
+ ImdbListIE
+)
+from .imgur import (
+ ImgurIE,
+ ImgurAlbumIE,
+ ImgurGalleryIE,
+)
+from .ina import InaIE
+from .inc import IncIE
+from .indavideo import IndavideoEmbedIE
+from .infoq import InfoQIE
+from .instagram import (
+ InstagramIE,
+ InstagramUserIE,
+ InstagramTagIE,
+)
+from .internazionale import InternazionaleIE
+from .internetvideoarchive import InternetVideoArchiveIE
+from .iprima import IPrimaIE
+from .iqiyi import IqiyiIE
+from .ir90tv import Ir90TvIE
+from .itv import (
+ ITVIE,
+ ITVBTCCIE,
+)
+from .ivi import (
+ IviIE,
+ IviCompilationIE
+)
+from .ivideon import IvideonIE
+from .iwara import IwaraIE
+from .izlesene import IzleseneIE
+from .jamendo import (
+ JamendoIE,
+ JamendoAlbumIE,
+)
+from .jeuxvideo import JeuxVideoIE
+from .jove import JoveIE
+from .joj import JojIE
+from .jwplatform import JWPlatformIE
+from .kakao import KakaoIE
+from .kaltura import KalturaIE
+from .kanalplay import KanalPlayIE
+from .kankan import KankanIE
+from .karaoketv import KaraoketvIE
+from .karrierevideos import KarriereVideosIE
+from .keezmovies import KeezMoviesIE
+from .ketnet import KetnetIE
+from .khanacademy import KhanAcademyIE
+from .kickstarter import KickStarterIE
+from .kinja import KinjaEmbedIE
+from .kinopoisk import KinoPoiskIE
+from .konserthusetplay import KonserthusetPlayIE
+from .krasview import KrasViewIE
+from .ku6 import Ku6IE
+from .kusi import KUSIIE
+from .kuwo import (
+ KuwoIE,
+ KuwoAlbumIE,
+ KuwoChartIE,
+ KuwoSingerIE,
+ KuwoCategoryIE,
+ KuwoMvIE,
+)
+from .la7 import LA7IE
+from .laola1tv import (
+ Laola1TvEmbedIE,
+ Laola1TvIE,
+ EHFTVIE,
+ ITTFIE,
+)
+from .lci import LCIIE
+from .lcp import (
+ LcpPlayIE,
+ LcpIE,
+)
+from .lecture2go import Lecture2GoIE
+from .lecturio import (
+ LecturioIE,
+ LecturioCourseIE,
+ LecturioDeCourseIE,
+)
+from .leeco import (
+ LeIE,
+ LePlaylistIE,
+ LetvCloudIE,
+)
+from .lego import LEGOIE
+from .lemonde import LemondeIE
+from .lenta import LentaIE
+from .libraryofcongress import LibraryOfCongressIE
+from .libsyn import LibsynIE
+from .lifenews import (
+ LifeNewsIE,
+ LifeEmbedIE,
+)
+from .limelight import (
+ LimelightMediaIE,
+ LimelightChannelIE,
+ LimelightChannelListIE,
+)
+from .line import LineTVIE
+from .linkedin import (
+ LinkedInLearningIE,
+ LinkedInLearningCourseIE,
+)
+from .linuxacademy import LinuxAcademyIE
+from .litv import LiTVIE
+from .livejournal import LiveJournalIE
+from .liveleak import (
+ LiveLeakIE,
+ LiveLeakEmbedIE,
+)
+from .livestream import (
+ LivestreamIE,
+ LivestreamOriginalIE,
+ LivestreamShortenerIE,
+)
+from .lnkgo import LnkGoIE
+from .localnews8 import LocalNews8IE
+from .lovehomeporn import LoveHomePornIE
+from .lrt import LRTIE
+from .lynda import (
+ LyndaIE,
+ LyndaCourseIE
+)
+from .m6 import M6IE
+from .mailru import (
+ MailRuIE,
+ MailRuMusicIE,
+ MailRuMusicSearchIE,
+)
+from .malltv import MallTVIE
+from .mangomolo import (
+ MangomoloVideoIE,
+ MangomoloLiveIE,
+)
+from .manyvids import ManyVidsIE
+from .markiza import (
+ MarkizaIE,
+ MarkizaPageIE,
+)
+from .massengeschmacktv import MassengeschmackTVIE
+from .matchtv import MatchTVIE
+from .mdr import MDRIE
+from .mediaset import MediasetIE
+from .mediasite import (
+ MediasiteIE,
+ MediasiteCatalogIE,
+ MediasiteNamedCatalogIE,
+)
+from .medici import MediciIE
+from .megaphone import MegaphoneIE
+from .meipai import MeipaiIE
+from .melonvod import MelonVODIE
+from .meta import METAIE
+from .metacafe import MetacafeIE
+from .metacritic import MetacriticIE
+from .mgoon import MgoonIE
+from .mgtv import MGTVIE
+from .miaopai import MiaoPaiIE
+from .microsoftvirtualacademy import (
+ MicrosoftVirtualAcademyIE,
+ MicrosoftVirtualAcademyCourseIE,
+)
+from .ministrygrid import MinistryGridIE
+from .minoto import MinotoIE
+from .miomio import MioMioIE
+from .mit import TechTVMITIE, OCWMITIE
+from .mitele import MiTeleIE
+from .mixcloud import (
+ MixcloudIE,
+ MixcloudUserIE,
+ MixcloudPlaylistIE,
+)
+from .mlb import MLBIE
+from .mnet import MnetIE
+from .moevideo import MoeVideoIE
+from .mofosex import (
+ MofosexIE,
+ MofosexEmbedIE,
+)
+from .mojvideo import MojvideoIE
+from .morningstar import MorningstarIE
+from .motherless import (
+ MotherlessIE,
+ MotherlessGroupIE
+)
+from .motorsport import MotorsportIE
+from .movieclips import MovieClipsIE
+from .moviezine import MoviezineIE
+from .movingimage import MovingImageIE
+from .msn import MSNIE
+from .mtv import (
+ MTVIE,
+ MTVVideoIE,
+ MTVServicesEmbeddedIE,
+ MTVDEIE,
+ MTVJapanIE,
+)
+from .muenchentv import MuenchenTVIE
+from .mwave import MwaveIE, MwaveMeetGreetIE
+from .mychannels import MyChannelsIE
+from .myspace import MySpaceIE, MySpaceAlbumIE
+from .myspass import MySpassIE
+from .myvi import (
+ MyviIE,
+ MyviEmbedIE,
+)
+from .myvidster import MyVidsterIE
+from .nationalgeographic import (
+ NationalGeographicVideoIE,
+ NationalGeographicTVIE,
+)
+from .naver import NaverIE
+from .nba import NBAIE
+from .nbc import (
+ CSNNEIE,
+ NBCIE,
+ NBCNewsIE,
+ NBCOlympicsIE,
+ NBCOlympicsStreamIE,
+ NBCSportsIE,
+ NBCSportsStreamIE,
+ NBCSportsVPlayerIE,
+)
+from .ndr import (
+ NDRIE,
+ NJoyIE,
+ NDREmbedBaseIE,
+ NDREmbedIE,
+ NJoyEmbedIE,
+)
+from .ndtv import NDTVIE
+from .netzkino import NetzkinoIE
+from .nerdcubed import NerdCubedFeedIE
+from .neteasemusic import (
+ NetEaseMusicIE,
+ NetEaseMusicAlbumIE,
+ NetEaseMusicSingerIE,
+ NetEaseMusicListIE,
+ NetEaseMusicMvIE,
+ NetEaseMusicProgramIE,
+ NetEaseMusicDjRadioIE,
+)
+from .newgrounds import (
+ NewgroundsIE,
+ NewgroundsPlaylistIE,
+)
+from .newstube import NewstubeIE
+from .nextmedia import (
+ NextMediaIE,
+ NextMediaActionNewsIE,
+ AppleDailyIE,
+ NextTVIE,
+)
+from .nexx import (
+ NexxIE,
+ NexxEmbedIE,
+)
+from .nfl import NFLIE
+from .nhk import NhkVodIE
+from .nhl import NHLIE
+from .nick import (
+ NickIE,
+ NickBrIE,
+ NickDeIE,
+ NickNightIE,
+ NickRuIE,
+)
+from .niconico import NiconicoIE, NiconicoPlaylistIE
+from .ninecninemedia import NineCNineMediaIE
+from .ninegag import NineGagIE
+from .ninenow import NineNowIE
+from .nintendo import NintendoIE
+from .njpwworld import NJPWWorldIE
+from .nobelprize import NobelPrizeIE
+from .noco import NocoIE
+from .nonktube import NonkTubeIE
+from .noovo import NoovoIE
+from .normalboots import NormalbootsIE
+from .nosvideo import NosVideoIE
+from .nova import (
+ NovaEmbedIE,
+ NovaIE,
+)
+from .nowness import (
+ NownessIE,
+ NownessPlaylistIE,
+ NownessSeriesIE,
+)
+from .noz import NozIE
+from .npo import (
+ AndereTijdenIE,
+ NPOIE,
+ NPOLiveIE,
+ NPORadioIE,
+ NPORadioFragmentIE,
+ SchoolTVIE,
+ HetKlokhuisIE,
+ VPROIE,
+ WNLIE,
+)
+from .npr import NprIE
+from .nrk import (
+ NRKIE,
+ NRKPlaylistIE,
+ NRKSkoleIE,
+ NRKTVIE,
+ NRKTVDirekteIE,
+ NRKTVEpisodeIE,
+ NRKTVEpisodesIE,
+ NRKTVSeasonIE,
+ NRKTVSeriesIE,
+)
+from .nrl import NRLTVIE
+from .ntvcojp import NTVCoJpCUIE
+from .ntvde import NTVDeIE
+from .ntvru import NTVRuIE
+from .nytimes import (
+ NYTimesIE,
+ NYTimesArticleIE,
+)
+from .nuvid import NuvidIE
+from .nzz import NZZIE
+from .odatv import OdaTVIE
+from .odnoklassniki import OdnoklassnikiIE
+from .oktoberfesttv import OktoberfestTVIE
+from .ondemandkorea import OnDemandKoreaIE
+from .onet import (
+ OnetIE,
+ OnetChannelIE,
+ OnetMVPIE,
+ OnetPlIE,
+)
+from .onionstudios import OnionStudiosIE
+from .ooyala import (
+ OoyalaIE,
+ OoyalaExternalIE,
+)
+from .ora import OraTVIE
+from .orf import (
+ ORFTVthekIE,
+ ORFFM4IE,
+ ORFFM4StoryIE,
+ ORFOE1IE,
+ ORFOE3IE,
+ ORFNOEIE,
+ ORFWIEIE,
+ ORFBGLIE,
+ ORFOOEIE,
+ ORFSTMIE,
+ ORFKTNIE,
+ ORFSBGIE,
+ ORFTIRIE,
+ ORFVBGIE,
+ ORFIPTVIE,
+)
+from .outsidetv import OutsideTVIE
+from .packtpub import (
+ PacktPubIE,
+ PacktPubCourseIE,
+)
+from .pandoratv import PandoraTVIE
+from .parliamentliveuk import ParliamentLiveUKIE
+from .patreon import PatreonIE
+from .pbs import PBSIE
+from .pearvideo import PearVideoIE
+from .peertube import PeerTubeIE
+from .people import PeopleIE
+from .performgroup import PerformGroupIE
+from .periscope import (
+ PeriscopeIE,
+ PeriscopeUserIE,
+)
+from .philharmoniedeparis import PhilharmonieDeParisIE
+from .phoenix import PhoenixIE
+from .photobucket import PhotobucketIE
+from .picarto import (
+ PicartoIE,
+ PicartoVodIE,
+)
+from .piksel import PikselIE
+from .pinkbike import PinkbikeIE
+from .pladform import PladformIE
+from .platzi import (
+ PlatziIE,
+ PlatziCourseIE,
+)
+from .playfm import PlayFMIE
+from .playplustv import PlayPlusTVIE
+from .plays import PlaysTVIE
+from .playtvak import PlaytvakIE
+from .playvid import PlayvidIE
+from .playwire import PlaywireIE
+from .pluralsight import (
+ PluralsightIE,
+ PluralsightCourseIE,
+)
+from .podomatic import PodomaticIE
+from .pokemon import (
+ PokemonIE,
+ PokemonWatchIE,
+)
+from .polskieradio import (
+ PolskieRadioIE,
+ PolskieRadioCategoryIE,
+)
+from .popcorntimes import PopcorntimesIE
+from .popcorntv import PopcornTVIE
+from .porn91 import Porn91IE
+from .porncom import PornComIE
+from .pornhd import PornHdIE
+from .pornhub import (
+ PornHubIE,
+ PornHubUserIE,
+ PornHubPagedVideoListIE,
+ PornHubUserVideosUploadIE,
+)
+from .pornotube import PornotubeIE
+from .pornovoisines import PornoVoisinesIE
+from .pornoxo import PornoXOIE
+from .puhutv import (
+ PuhuTVIE,
+ PuhuTVSerieIE,
+)
+from .presstv import PressTVIE
+from .prosiebensat1 import ProSiebenSat1IE
+from .puls4 import Puls4IE
+from .pyvideo import PyvideoIE
+from .qqmusic import (
+ QQMusicIE,
+ QQMusicSingerIE,
+ QQMusicAlbumIE,
+ QQMusicToplistIE,
+ QQMusicPlaylistIE,
+)
+from .r7 import (
+ R7IE,
+ R7ArticleIE,
+)
+from .radiocanada import (
+ RadioCanadaIE,
+ RadioCanadaAudioVideoIE,
+)
+from .radiode import RadioDeIE
+from .radiojavan import RadioJavanIE
+from .radiobremen import RadioBremenIE
+from .radiofrance import RadioFranceIE
+from .rai import (
+ RaiPlayIE,
+ RaiPlayLiveIE,
+ RaiPlayPlaylistIE,
+ RaiIE,
+)
+from .raywenderlich import (
+ RayWenderlichIE,
+ RayWenderlichCourseIE,
+)
+from .rbmaradio import RBMARadioIE
+from .rds import RDSIE
+from .redbulltv import (
+ RedBullTVIE,
+ RedBullTVRrnContentIE,
+)
+from .reddit import (
+ RedditIE,
+ RedditRIE,
+)
+from .redtube import RedTubeIE
+from .regiotv import RegioTVIE
+from .rentv import (
+ RENTVIE,
+ RENTVArticleIE,
+)
+from .restudy import RestudyIE
+from .reuters import ReutersIE
+from .reverbnation import ReverbNationIE
+from .rice import RICEIE
+from .rmcdecouverte import RMCDecouverteIE
+from .ro220 import Ro220IE
+from .rockstargames import RockstarGamesIE
+from .roosterteeth import RoosterTeethIE
+from .rottentomatoes import RottenTomatoesIE
+from .roxwel import RoxwelIE
+from .rozhlas import RozhlasIE
+from .rtbf import RTBFIE
+from .rte import RteIE, RteRadioIE
+from .rtlnl import RtlNlIE
+from .rtl2 import (
+ RTL2IE,
+ RTL2YouIE,
+ RTL2YouSeriesIE,
+)
+from .rtp import RTPIE
+from .rts import RTSIE
+from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE
+from .rtvnh import RTVNHIE
+from .rtvs import RTVSIE
+from .ruhd import RUHDIE
+from .rutube import (
+ RutubeIE,
+ RutubeChannelIE,
+ RutubeEmbedIE,
+ RutubeMovieIE,
+ RutubePersonIE,
+ RutubePlaylistIE,
+)
+from .rutv import RUTVIE
+from .ruutu import RuutuIE
+from .ruv import RuvIE
+from .safari import (
+ SafariIE,
+ SafariApiIE,
+ SafariCourseIE,
+)
+from .sapo import SapoIE
+from .savefrom import SaveFromIE
+from .sbs import SBSIE
+from .screencast import ScreencastIE
+from .screencastomatic import ScreencastOMaticIE
+from .scrippsnetworks import (
+ ScrippsNetworksWatchIE,
+ ScrippsNetworksIE,
+)
+from .scte import (
+ SCTEIE,
+ SCTECourseIE,
+)
+from .seeker import SeekerIE
+from .senateisvp import SenateISVPIE
+from .sendtonews import SendtoNewsIE
+from .servus import ServusIE
+from .sevenplus import SevenPlusIE
+from .sexu import SexuIE
+from .seznamzpravy import (
+ SeznamZpravyIE,
+ SeznamZpravyArticleIE,
+)
+from .shahid import (
+ ShahidIE,
+ ShahidShowIE,
+)
+from .shared import (
+ SharedIE,
+ VivoIE,
+)
+from .showroomlive import ShowRoomLiveIE
+from .sina import SinaIE
+from .sixplay import SixPlayIE
+from .skylinewebcams import SkylineWebcamsIE
+from .skynewsarabia import (
+ SkyNewsArabiaIE,
+ SkyNewsArabiaArticleIE,
+)
+from .sky import (
+ SkyNewsIE,
+ SkySportsIE,
+)
+from .slideshare import SlideshareIE
+from .slideslive import SlidesLiveIE
+from .slutload import SlutloadIE
+from .smotri import (
+ SmotriIE,
+ SmotriCommunityIE,
+ SmotriUserIE,
+ SmotriBroadcastIE,
+)
+from .snotr import SnotrIE
+from .sohu import SohuIE
+from .sonyliv import SonyLIVIE
+from .soundcloud import (
+ SoundcloudEmbedIE,
+ SoundcloudIE,
+ SoundcloudSetIE,
+ SoundcloudUserIE,
+ SoundcloudTrackStationIE,
+ SoundcloudPlaylistIE,
+ SoundcloudSearchIE,
+)
+from .soundgasm import (
+ SoundgasmIE,
+ SoundgasmProfileIE
+)
+from .southpark import (
+ SouthParkIE,
+ SouthParkDeIE,
+ SouthParkDkIE,
+ SouthParkEsIE,
+ SouthParkNlIE
+)
+from .spankbang import (
+ SpankBangIE,
+ SpankBangPlaylistIE,
+)
+from .spankwire import SpankwireIE
+from .spiegel import SpiegelIE, SpiegelArticleIE
+from .spiegeltv import SpiegeltvIE
+from .spike import (
+ BellatorIE,
+ ParamountNetworkIE,
+)
+from .storyfire import (
+ StoryFireIE,
+ StoryFireUserIE,
+ StoryFireSeriesIE,
+)
+from .stitcher import StitcherIE
+from .sport5 import Sport5IE
+from .sportbox import SportBoxIE
+from .sportdeutschland import SportDeutschlandIE
+from .springboardplatform import SpringboardPlatformIE
+from .sprout import SproutIE
+from .srgssr import (
+ SRGSSRIE,
+ SRGSSRPlayIE,
+)
+from .srmediathek import SRMediathekIE
+from .stanfordoc import StanfordOpenClassroomIE
+from .steam import SteamIE
+from .streamable import StreamableIE
+from .streamcloud import StreamcloudIE
+from .streamcz import StreamCZIE
+from .streetvoice import StreetVoiceIE
+from .stretchinternet import StretchInternetIE
+from .stv import STVPlayerIE
+from .sunporno import SunPornoIE
+from .sverigesradio import (
+ SverigesRadioEpisodeIE,
+ SverigesRadioPublicationIE,
+)
+from .svt import (
+ SVTIE,
+ SVTPageIE,
+ SVTPlayIE,
+ SVTSeriesIE,
+)
+from .swrmediathek import SWRMediathekIE
+from .syfy import SyfyIE
+from .sztvhu import SztvHuIE
+from .tagesschau import (
+ TagesschauPlayerIE,
+ TagesschauIE,
+)
+from .tass import TassIE
+from .tastytrade import TastyTradeIE
+from .tbs import TBSIE
+from .tdslifeway import TDSLifewayIE
+from .teachable import (
+ TeachableIE,
+ TeachableCourseIE,
+)
+from .teachertube import (
+ TeacherTubeIE,
+ TeacherTubeUserIE,
+)
+from .teachingchannel import TeachingChannelIE
+from .teamcoco import TeamcocoIE
+from .teamtreehouse import TeamTreeHouseIE
+from .techtalks import TechTalksIE
+from .ted import TEDIE
+from .tele5 import Tele5IE
+from .tele13 import Tele13IE
+from .telebruxelles import TeleBruxellesIE
+from .telecinco import TelecincoIE
+from .telegraaf import TelegraafIE
+from .telemb import TeleMBIE
+from .telequebec import (
+ TeleQuebecIE,
+ TeleQuebecSquatIE,
+ TeleQuebecEmissionIE,
+ TeleQuebecLiveIE,
+)
+from .teletask import TeleTaskIE
+from .telewebion import TelewebionIE
+from .tennistv import TennisTVIE
+from .tenplay import TenPlayIE
+from .testurl import TestURLIE
+from .tf1 import TF1IE
+from .tfo import TFOIE
+from .theintercept import TheInterceptIE
+from .theplatform import (
+ ThePlatformIE,
+ ThePlatformFeedIE,
+)
+from .thescene import TheSceneIE
+from .thestar import TheStarIE
+from .thesun import TheSunIE
+from .theweatherchannel import TheWeatherChannelIE
+from .thisamericanlife import ThisAmericanLifeIE
+from .thisav import ThisAVIE
+from .thisoldhouse import ThisOldHouseIE
+from .threeqsdn import ThreeQSDNIE
+from .tiktok import (
+ TikTokIE,
+ TikTokUserIE,
+)
+from .tinypic import TinyPicIE
+from .tmz import (
+ TMZIE,
+ TMZArticleIE,
+)
+from .tnaflix import (
+ TNAFlixNetworkEmbedIE,
+ TNAFlixIE,
+ EMPFlixIE,
+ MovieFapIE,
+)
+from .toggle import ToggleIE
+from .tonline import TOnlineIE
+from .toongoggles import ToonGogglesIE
+from .toutv import TouTvIE
+from .toypics import ToypicsUserIE, ToypicsIE
+from .traileraddict import TrailerAddictIE
+from .trilulilu import TriluliluIE
+from .trunews import TruNewsIE
+from .trutv import TruTVIE
+from .tube8 import Tube8IE
+from .tubitv import TubiTvIE
+from .tumblr import TumblrIE
+from .tunein import (
+ TuneInClipIE,
+ TuneInStationIE,
+ TuneInProgramIE,
+ TuneInTopicIE,
+ TuneInShortenerIE,
+)
+from .tunepk import TunePkIE
+from .turbo import TurboIE
+from .tv2 import (
+ TV2IE,
+ TV2ArticleIE,
+ KatsomoIE,
+)
+from .tv2dk import (
+ TV2DKIE,
+ TV2DKBornholmPlayIE,
+)
+from .tv2hu import TV2HuIE
+from .tv4 import TV4IE
+from .tv5mondeplus import TV5MondePlusIE
+from .tva import TVAIE
+from .tvanouvelles import (
+ TVANouvellesIE,
+ TVANouvellesArticleIE,
+)
+from .tvc import (
+ TVCIE,
+ TVCArticleIE,
+)
+from .tvigle import TvigleIE
+from .tvland import TVLandIE
+from .tvn24 import TVN24IE
+from .tvnet import TVNetIE
+from .tvnoe import TVNoeIE
+from .tvnow import (
+ TVNowIE,
+ TVNowFilmIE,
+ TVNowNewIE,
+ TVNowSeasonIE,
+ TVNowAnnualIE,
+ TVNowShowIE,
+)
+from .tvp import (
+ TVPEmbedIE,
+ TVPIE,
+ TVPWebsiteIE,
+)
+from .tvplay import (
+ TVPlayIE,
+ ViafreeIE,
+ TVPlayHomeIE,
+)
+from .tvplayer import TVPlayerIE
+from .tweakers import TweakersIE
+from .twentyfourvideo import TwentyFourVideoIE
+from .twentymin import TwentyMinutenIE
+from .twentythreevideo import TwentyThreeVideoIE
+from .twitcasting import TwitCastingIE
+from .twitch import (
+ TwitchVideoIE,
+ TwitchChapterIE,
+ TwitchVodIE,
+ TwitchProfileIE,
+ TwitchAllVideosIE,
+ TwitchUploadsIE,
+ TwitchPastBroadcastsIE,
+ TwitchHighlightsIE,
+ TwitchStreamIE,
+ TwitchClipsIE,
+)
+from .twitter import (
+ TwitterCardIE,
+ TwitterIE,
+ TwitterAmplifyIE,
+ TwitterBroadcastIE,
+)
+from .udemy import (
+ UdemyIE,
+ UdemyCourseIE
+)
+from .udn import UDNEmbedIE
+from .ufctv import (
+ UFCTVIE,
+ UFCArabiaIE,
+)
+from .uktvplay import UKTVPlayIE
+from .digiteka import DigitekaIE
+from .dlive import (
+ DLiveVODIE,
+ DLiveStreamIE,
+)
+from .umg import UMGDeIE
+from .unistra import UnistraIE
+from .unity import UnityIE
+from .uol import UOLIE
+from .uplynk import (
+ UplynkIE,
+ UplynkPreplayIE,
+)
+from .urort import UrortIE
+from .urplay import URPlayIE
+from .usanetwork import USANetworkIE
+from .usatoday import USATodayIE
+from .ustream import UstreamIE, UstreamChannelIE
+from .ustudio import (
+ UstudioIE,
+ UstudioEmbedIE,
+)
+from .varzesh3 import Varzesh3IE
+from .vbox7 import Vbox7IE
+from .veehd import VeeHDIE
+from .veoh import VeohIE
+from .vesti import VestiIE
+from .vevo import (
+ VevoIE,
+ VevoPlaylistIE,
+)
+from .vgtv import (
+ BTArticleIE,
+ BTVestlendingenIE,
+ VGTVIE,
+)
+from .vh1 import VH1IE
+from .vice import (
+ ViceIE,
+ ViceArticleIE,
+ ViceShowIE,
+)
+from .vidbit import VidbitIE
+from .viddler import ViddlerIE
+from .videa import VideaIE
+from .videodetective import VideoDetectiveIE
+from .videofyme import VideofyMeIE
+from .videomore import (
+ VideomoreIE,
+ VideomoreVideoIE,
+ VideomoreSeasonIE,
+)
+from .videopress import VideoPressIE
+from .vidio import VidioIE
+from .vidlii import VidLiiIE
+from .vidme import (
+ VidmeIE,
+ VidmeUserIE,
+ VidmeUserLikesIE,
+)
+from .vidzi import VidziIE
+from .vier import VierIE, VierVideosIE
+from .viewlift import (
+ ViewLiftIE,
+ ViewLiftEmbedIE,
+)
+from .viidea import ViideaIE
+from .vimeo import (
+ VimeoIE,
+ VimeoAlbumIE,
+ VimeoChannelIE,
+ VimeoGroupsIE,
+ VimeoLikesIE,
+ VimeoOndemandIE,
+ VimeoReviewIE,
+ VimeoUserIE,
+ VimeoWatchLaterIE,
+ VHXEmbedIE,
+)
+from .vimple import VimpleIE
+from .vine import (
+ VineIE,
+ VineUserIE,
+)
+from .viki import (
+ VikiIE,
+ VikiChannelIE,
+)
+from .viqeo import ViqeoIE
+from .viu import (
+ ViuIE,
+ ViuPlaylistIE,
+ ViuOTTIE,
+)
+from .vk import (
+ VKIE,
+ VKUserVideosIE,
+ VKWallPostIE,
+)
+from .vlive import (
+ VLiveIE,
+ VLiveChannelIE,
+ VLivePlaylistIE
+)
+from .vodlocker import VodlockerIE
+from .vodpl import VODPlIE
+from .vodplatform import VODPlatformIE
+from .voicerepublic import VoiceRepublicIE
+from .voot import VootIE
+from .voxmedia import (
+ VoxMediaVolumeIE,
+ VoxMediaIE,
+)
+from .vrt import VRTIE
+from .vrak import VrakIE
+from .vrv import (
+ VRVIE,
+ VRVSeriesIE,
+)
+from .vshare import VShareIE
+from .medialaan import MedialaanIE
+from .vube import VubeIE
+from .vuclip import VuClipIE
+from .vvvvid import VVVVIDIE
+from .vyborymos import VyboryMosIE
+from .vzaar import VzaarIE
+from .wakanim import WakanimIE
+from .walla import WallaIE
+from .washingtonpost import (
+ WashingtonPostIE,
+ WashingtonPostArticleIE,
+)
+from .wat import WatIE
+from .watchbox import WatchBoxIE
+from .watchindianporn import WatchIndianPornIE
+from .wdr import (
+ WDRIE,
+ WDRPageIE,
+ WDRElefantIE,
+ WDRMobileIE,
+)
+from .webcaster import (
+ WebcasterIE,
+ WebcasterFeedIE,
+)
+from .webofstories import (
+ WebOfStoriesIE,
+ WebOfStoriesPlaylistIE,
+)
+from .weibo import (
+ WeiboIE,
+ WeiboMobileIE
+)
+from .weiqitv import WeiqiTVIE
+from .wistia import WistiaIE
+from .worldstarhiphop import WorldStarHipHopIE
+from .wsj import (
+ WSJIE,
+ WSJArticleIE,
+)
+from .wwe import WWEIE
+from .xbef import XBefIE
+from .xboxclips import XboxClipsIE
+from .xfileshare import XFileShareIE
+from .xhamster import (
+ XHamsterIE,
+ XHamsterEmbedIE,
+ XHamsterUserIE,
+)
+from .xiami import (
+ XiamiSongIE,
+ XiamiAlbumIE,
+ XiamiArtistIE,
+ XiamiCollectionIE
+)
+from .ximalaya import (
+ XimalayaIE,
+ XimalayaAlbumIE
+)
+from .xminus import XMinusIE
+from .xnxx import XNXXIE
+from .xstream import XstreamIE
+from .xtube import XTubeUserIE, XTubeIE
+from .xuite import XuiteIE
+from .xvideos import XVideosIE
+from .xxxymovies import XXXYMoviesIE
+from .yahoo import (
+ YahooIE,
+ YahooSearchIE,
+ YahooGyaOPlayerIE,
+ YahooGyaOIE,
+ YahooJapanNewsIE,
+)
+from .yandexdisk import YandexDiskIE
+from .yandexmusic import (
+ YandexMusicTrackIE,
+ YandexMusicAlbumIE,
+ YandexMusicPlaylistIE,
+)
+from .yandexvideo import YandexVideoIE
+from .yapfiles import YapFilesIE
+from .yesjapan import YesJapanIE
+from .yinyuetai import YinYueTaiIE
+from .ynet import YnetIE
+from .youjizz import YouJizzIE
+from .youku import (
+ YoukuIE,
+ YoukuShowIE,
+)
+from .younow import (
+ YouNowLiveIE,
+ YouNowChannelIE,
+ YouNowMomentIE,
+)
+from .youporn import YouPornIE
+from .yourporn import YourPornIE
+from .yourupload import YourUploadIE
+from .youtube import (
+ YoutubeIE,
+ YoutubeChannelIE,
+ YoutubeFavouritesIE,
+ YoutubeHistoryIE,
+ YoutubeLiveIE,
+ YoutubePlaylistIE,
+ YoutubePlaylistsIE,
+ YoutubeRecommendedIE,
+ YoutubeSearchDateIE,
+ YoutubeSearchIE,
+ YoutubeSearchURLIE,
+ YoutubeShowIE,
+ YoutubeSubscriptionsIE,
+ YoutubeTruncatedIDIE,
+ YoutubeTruncatedURLIE,
+ YoutubeUserIE,
+ YoutubeWatchLaterIE,
+)
+from .zapiks import ZapiksIE
+from .zaq1 import Zaq1IE
+from .zattoo import (
+ BBVTVIE,
+ EinsUndEinsTVIE,
+ EWETVIE,
+ GlattvisionTVIE,
+ MNetTVIE,
+ MyVisionTVIE,
+ NetPlusIE,
+ OsnatelTVIE,
+ QuantumTVIE,
+ QuicklineIE,
+ QuicklineLiveIE,
+ SaltTVIE,
+ SAKTVIE,
+ VTXTVIE,
+ WalyTVIE,
+ ZattooIE,
+ ZattooLiveIE,
+)
+from .zdf import ZDFIE, ZDFChannelIE
+from .zingmp3 import ZingMp3IE
+from .zype import ZypeIE
diff --git a/youtube_dl/extractor/extremetube.py b/youtube_dlc/extractor/extremetube.py
index acd4090fa..acd4090fa 100644
--- a/youtube_dl/extractor/extremetube.py
+++ b/youtube_dlc/extractor/extremetube.py
diff --git a/youtube_dl/extractor/eyedotv.py b/youtube_dlc/extractor/eyedotv.py
index f62ddebae..f62ddebae 100644
--- a/youtube_dl/extractor/eyedotv.py
+++ b/youtube_dlc/extractor/eyedotv.py
diff --git a/youtube_dlc/extractor/facebook.py b/youtube_dlc/extractor/facebook.py
new file mode 100644
index 000000000..610d66745
--- /dev/null
+++ b/youtube_dlc/extractor/facebook.py
@@ -0,0 +1,514 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import socket
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_etree_fromstring,
+ compat_http_client,
+ compat_urllib_error,
+ compat_urllib_parse_unquote,
+ compat_urllib_parse_unquote_plus,
+)
+from ..utils import (
+ clean_html,
+ error_to_compat_str,
+ ExtractorError,
+ get_element_by_id,
+ int_or_none,
+ js_to_json,
+ limit_length,
+ parse_count,
+ sanitized_Request,
+ try_get,
+ urlencode_postdata,
+)
+
+
+class FacebookIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ https?://
+ (?:[\w-]+\.)?(?:facebook\.com|facebookcorewwwi\.onion)/
+ (?:[^#]*?\#!/)?
+ (?:
+ (?:
+ video/video\.php|
+ photo\.php|
+ video\.php|
+ video/embed|
+ story\.php
+ )\?(?:.*?)(?:v|video_id|story_fbid)=|
+ [^/]+/videos/(?:[^/]+/)?|
+ [^/]+/posts/|
+ groups/[^/]+/permalink/
+ )|
+ facebook:
+ )
+ (?P<id>[0-9]+)
+ '''
+ _LOGIN_URL = 'https://www.facebook.com/login.php?next=http%3A%2F%2Ffacebook.com%2Fhome.php&login_attempt=1'
+ _CHECKPOINT_URL = 'https://www.facebook.com/checkpoint/?next=http%3A%2F%2Ffacebook.com%2Fhome.php&_fb_noscript=1'
+ _NETRC_MACHINE = 'facebook'
+ IE_NAME = 'facebook'
+
+ _CHROME_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36'
+
+ _VIDEO_PAGE_TEMPLATE = 'https://www.facebook.com/video/video.php?v=%s'
+ _VIDEO_PAGE_TAHOE_TEMPLATE = 'https://www.facebook.com/video/tahoe/async/%s/?chain=true&isvideo=true&payloadtype=primary'
+
+ _TESTS = [{
+ 'url': 'https://www.facebook.com/video.php?v=637842556329505&fref=nf',
+ 'md5': '6a40d33c0eccbb1af76cf0485a052659',
+ 'info_dict': {
+ 'id': '637842556329505',
+ 'ext': 'mp4',
+ 'title': 're:Did you know Kei Nishikori is the first Asian man to ever reach a Grand Slam',
+ 'uploader': 'Tennis on Facebook',
+ 'upload_date': '20140908',
+ 'timestamp': 1410199200,
+ },
+ 'skip': 'Requires logging in',
+ }, {
+ 'url': 'https://www.facebook.com/video.php?v=274175099429670',
+ 'info_dict': {
+ 'id': '274175099429670',
+ 'ext': 'mp4',
+ 'title': 're:^Asif Nawab Butt posted a video',
+ 'uploader': 'Asif Nawab Butt',
+ 'upload_date': '20140506',
+ 'timestamp': 1399398998,
+ 'thumbnail': r're:^https?://.*',
+ },
+ 'expected_warnings': [
+ 'title'
+ ]
+ }, {
+ 'note': 'Video with DASH manifest',
+ 'url': 'https://www.facebook.com/video.php?v=957955867617029',
+ 'md5': 'b2c28d528273b323abe5c6ab59f0f030',
+ 'info_dict': {
+ 'id': '957955867617029',
+ 'ext': 'mp4',
+ 'title': 'When you post epic content on instagram.com/433 8 million followers, this is ...',
+ 'uploader': 'Demy de Zeeuw',
+ 'upload_date': '20160110',
+ 'timestamp': 1452431627,
+ },
+ 'skip': 'Requires logging in',
+ }, {
+ 'url': 'https://www.facebook.com/maxlayn/posts/10153807558977570',
+ 'md5': '037b1fa7f3c2d02b7a0d7bc16031ecc6',
+ 'info_dict': {
+ 'id': '544765982287235',
+ 'ext': 'mp4',
+ 'title': '"What are you doing running in the snow?"',
+ 'uploader': 'FailArmy',
+ },
+ 'skip': 'Video gone',
+ }, {
+ 'url': 'https://m.facebook.com/story.php?story_fbid=1035862816472149&id=116132035111903',
+ 'md5': '1deb90b6ac27f7efcf6d747c8a27f5e3',
+ 'info_dict': {
+ 'id': '1035862816472149',
+ 'ext': 'mp4',
+ 'title': 'What the Flock Is Going On In New Zealand Credit: ViralHog',
+ 'uploader': 'S. Saint',
+ },
+ 'skip': 'Video gone',
+ }, {
+ 'note': 'swf params escaped',
+ 'url': 'https://www.facebook.com/barackobama/posts/10153664894881749',
+ 'md5': '97ba073838964d12c70566e0085c2b91',
+ 'info_dict': {
+ 'id': '10153664894881749',
+ 'ext': 'mp4',
+ 'title': 'Average time to confirm recent Supreme Court nominees: 67 days Longest it\'s t...',
+ 'thumbnail': r're:^https?://.*',
+ 'timestamp': 1456259628,
+ 'upload_date': '20160223',
+ 'uploader': 'Barack Obama',
+ },
+ }, {
+ # have 1080P, but only up to 720p in swf params
+ 'url': 'https://www.facebook.com/cnn/videos/10155529876156509/',
+ 'md5': '9571fae53d4165bbbadb17a94651dcdc',
+ 'info_dict': {
+ 'id': '10155529876156509',
+ 'ext': 'mp4',
+ 'title': 'She survived the holocaust — and years later, she’s getting her citizenship s...',
+ 'timestamp': 1477818095,
+ 'upload_date': '20161030',
+ 'uploader': 'CNN',
+ 'thumbnail': r're:^https?://.*',
+ 'view_count': int,
+ },
+ }, {
+ # bigPipe.onPageletArrive ... onPageletArrive pagelet_group_mall
+ 'url': 'https://www.facebook.com/yaroslav.korpan/videos/1417995061575415/',
+ 'info_dict': {
+ 'id': '1417995061575415',
+ 'ext': 'mp4',
+ 'title': 'md5:1db063d6a8c13faa8da727817339c857',
+ 'timestamp': 1486648217,
+ 'upload_date': '20170209',
+ 'uploader': 'Yaroslav Korpan',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.facebook.com/LaGuiaDelVaron/posts/1072691702860471',
+ 'info_dict': {
+ 'id': '1072691702860471',
+ 'ext': 'mp4',
+ 'title': 'md5:ae2d22a93fbb12dad20dc393a869739d',
+ 'timestamp': 1477305000,
+ 'upload_date': '20161024',
+ 'uploader': 'La Guía Del Varón',
+ 'thumbnail': r're:^https?://.*',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.facebook.com/groups/1024490957622648/permalink/1396382447100162/',
+ 'info_dict': {
+ 'id': '1396382447100162',
+ 'ext': 'mp4',
+ 'title': 'md5:19a428bbde91364e3de815383b54a235',
+ 'timestamp': 1486035494,
+ 'upload_date': '20170202',
+ 'uploader': 'Elisabeth Ahtn',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.facebook.com/video.php?v=10204634152394104',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.facebook.com/amogood/videos/1618742068337349/?fref=nf',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.facebook.com/ChristyClarkForBC/videos/vb.22819070941/10153870694020942/?type=2&theater',
+ 'only_matching': True,
+ }, {
+ 'url': 'facebook:544765982287235',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.facebook.com/groups/164828000315060/permalink/764967300301124/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://zh-hk.facebook.com/peoplespower/videos/1135894589806027/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.facebookcorewwwi.onion/video.php?v=274175099429670',
+ 'only_matching': True,
+ }, {
+ # no title
+ 'url': 'https://www.facebook.com/onlycleverentertainment/videos/1947995502095005/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.facebook.com/WatchESLOne/videos/359649331226507/',
+ 'info_dict': {
+ 'id': '359649331226507',
+ 'ext': 'mp4',
+ 'title': '#ESLOne VoD - Birmingham Finals Day#1 Fnatic vs. @Evil Geniuses',
+ 'uploader': 'ESL One Dota 2',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ urls = []
+ for mobj in re.finditer(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://www\.facebook\.com/(?:video/embed|plugins/video\.php).+?)\1',
+ webpage):
+ urls.append(mobj.group('url'))
+ # Facebook API embed
+ # see https://developers.facebook.com/docs/plugins/embedded-video-player
+ for mobj in re.finditer(r'''(?x)<div[^>]+
+ class=(?P<q1>[\'"])[^\'"]*\bfb-(?:video|post)\b[^\'"]*(?P=q1)[^>]+
+ data-href=(?P<q2>[\'"])(?P<url>(?:https?:)?//(?:www\.)?facebook.com/.+?)(?P=q2)''', webpage):
+ urls.append(mobj.group('url'))
+ return urls
+
+ def _login(self):
+ useremail, password = self._get_login_info()
+ if useremail is None:
+ return
+
+ login_page_req = sanitized_Request(self._LOGIN_URL)
+ self._set_cookie('facebook.com', 'locale', 'en_US')
+ login_page = self._download_webpage(login_page_req, None,
+ note='Downloading login page',
+ errnote='Unable to download login page')
+ lsd = self._search_regex(
+ r'<input type="hidden" name="lsd" value="([^"]*)"',
+ login_page, 'lsd')
+ lgnrnd = self._search_regex(r'name="lgnrnd" value="([^"]*?)"', login_page, 'lgnrnd')
+
+ login_form = {
+ 'email': useremail,
+ 'pass': password,
+ 'lsd': lsd,
+ 'lgnrnd': lgnrnd,
+ 'next': 'http://facebook.com/home.php',
+ 'default_persistent': '0',
+ 'legacy_return': '1',
+ 'timezone': '-60',
+ 'trynum': '1',
+ }
+ request = sanitized_Request(self._LOGIN_URL, urlencode_postdata(login_form))
+ request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+ try:
+ login_results = self._download_webpage(request, None,
+ note='Logging in', errnote='unable to fetch login page')
+ if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
+ error = self._html_search_regex(
+ r'(?s)<div[^>]+class=(["\']).*?login_error_box.*?\1[^>]*><div[^>]*>.*?</div><div[^>]*>(?P<error>.+?)</div>',
+ login_results, 'login error', default=None, group='error')
+ if error:
+ raise ExtractorError('Unable to login: %s' % error, expected=True)
+ self._downloader.report_warning('unable to log in: bad username/password, or exceeded login rate limit (~3/min). Check credentials or wait.')
+ return
+
+ fb_dtsg = self._search_regex(
+ r'name="fb_dtsg" value="(.+?)"', login_results, 'fb_dtsg', default=None)
+ h = self._search_regex(
+ r'name="h"\s+(?:\w+="[^"]+"\s+)*?value="([^"]+)"', login_results, 'h', default=None)
+
+ if not fb_dtsg or not h:
+ return
+
+ check_form = {
+ 'fb_dtsg': fb_dtsg,
+ 'h': h,
+ 'name_action_selected': 'dont_save',
+ }
+ check_req = sanitized_Request(self._CHECKPOINT_URL, urlencode_postdata(check_form))
+ check_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
+ check_response = self._download_webpage(check_req, None,
+ note='Confirming login')
+ if re.search(r'id="checkpointSubmitButton"', check_response) is not None:
+ self._downloader.report_warning('Unable to confirm login, you have to login in your browser and authorize the login.')
+ except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ self._downloader.report_warning('unable to log in: %s' % error_to_compat_str(err))
+ return
+
+ def _real_initialize(self):
+ self._login()
+
+ def _extract_from_url(self, url, video_id, fatal_if_no_video=True):
+ req = sanitized_Request(url)
+ req.add_header('User-Agent', self._CHROME_USER_AGENT)
+ webpage = self._download_webpage(req, video_id)
+
+ video_data = None
+
+ def extract_video_data(instances):
+ for item in instances:
+ if item[1][0] == 'VideoConfig':
+ video_item = item[2][0]
+ if video_item.get('video_id'):
+ return video_item['videoData']
+
+ server_js_data = self._parse_json(self._search_regex(
+ r'handleServerJS\(({.+})(?:\);|,")', webpage,
+ 'server js data', default='{}'), video_id, fatal=False)
+
+ if server_js_data:
+ video_data = extract_video_data(server_js_data.get('instances', []))
+
+ def extract_from_jsmods_instances(js_data):
+ if js_data:
+ return extract_video_data(try_get(
+ js_data, lambda x: x['jsmods']['instances'], list) or [])
+
+ if not video_data:
+ server_js_data = self._parse_json(
+ self._search_regex(
+ r'bigPipe\.onPageletArrive\(({.+?})\)\s*;\s*}\s*\)\s*,\s*["\']onPageletArrive\s+(?:pagelet_group_mall|permalink_video_pagelet|hyperfeed_story_id_\d+)',
+ webpage, 'js data', default='{}'),
+ video_id, transform_source=js_to_json, fatal=False)
+ video_data = extract_from_jsmods_instances(server_js_data)
+
+ if not video_data:
+ if not fatal_if_no_video:
+ return webpage, False
+ m_msg = re.search(r'class="[^"]*uiInterstitialContent[^"]*"><div>(.*?)</div>', webpage)
+ if m_msg is not None:
+ raise ExtractorError(
+ 'The video is not available, Facebook said: "%s"' % m_msg.group(1),
+ expected=True)
+ elif '>You must log in to continue' in webpage:
+ self.raise_login_required()
+
+ # Video info not in first request, do a secondary request using
+ # tahoe player specific URL
+ tahoe_data = self._download_webpage(
+ self._VIDEO_PAGE_TAHOE_TEMPLATE % video_id, video_id,
+ data=urlencode_postdata({
+ '__a': 1,
+ '__pc': self._search_regex(
+ r'pkg_cohort["\']\s*:\s*["\'](.+?)["\']', webpage,
+ 'pkg cohort', default='PHASED:DEFAULT'),
+ '__rev': self._search_regex(
+ r'client_revision["\']\s*:\s*(\d+),', webpage,
+ 'client revision', default='3944515'),
+ 'fb_dtsg': self._search_regex(
+ r'"DTSGInitialData"\s*,\s*\[\]\s*,\s*{\s*"token"\s*:\s*"([^"]+)"',
+ webpage, 'dtsg token', default=''),
+ }),
+ headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ })
+ tahoe_js_data = self._parse_json(
+ self._search_regex(
+ r'for\s+\(\s*;\s*;\s*\)\s*;(.+)', tahoe_data,
+ 'tahoe js data', default='{}'),
+ video_id, fatal=False)
+ video_data = extract_from_jsmods_instances(tahoe_js_data)
+
+ if not video_data:
+ raise ExtractorError('Cannot parse data')
+
+ subtitles = {}
+ formats = []
+ for f in video_data:
+ format_id = f['stream_type']
+ if f and isinstance(f, dict):
+ f = [f]
+ if not f or not isinstance(f, list):
+ continue
+ for quality in ('sd', 'hd'):
+ for src_type in ('src', 'src_no_ratelimit'):
+ src = f[0].get('%s_%s' % (quality, src_type))
+ if src:
+ preference = -10 if format_id == 'progressive' else 0
+ if quality == 'hd':
+ preference += 5
+ formats.append({
+ 'format_id': '%s_%s_%s' % (format_id, quality, src_type),
+ 'url': src,
+ 'preference': preference,
+ })
+ dash_manifest = f[0].get('dash_manifest')
+ if dash_manifest:
+ formats.extend(self._parse_mpd_formats(
+ compat_etree_fromstring(compat_urllib_parse_unquote_plus(dash_manifest))))
+ subtitles_src = f[0].get('subtitles_src')
+ if subtitles_src:
+ subtitles.setdefault('en', []).append({'url': subtitles_src})
+ if not formats:
+ raise ExtractorError('Cannot find video formats')
+
+ # Downloads with browser's User-Agent are rate limited. Working around
+ # with non-browser User-Agent.
+ for f in formats:
+ f.setdefault('http_headers', {})['User-Agent'] = 'facebookexternalhit/1.1'
+
+ self._sort_formats(formats)
+
+ video_title = self._html_search_regex(
+ r'<h2\s+[^>]*class="uiHeaderTitle"[^>]*>([^<]*)</h2>', webpage,
+ 'title', default=None)
+ if not video_title:
+ video_title = self._html_search_regex(
+ r'(?s)<span class="fbPhotosPhotoCaption".*?id="fbPhotoPageCaption"><span class="hasCaption">(.*?)</span>',
+ webpage, 'alternative title', default=None)
+ if not video_title:
+ video_title = self._html_search_meta(
+ 'description', webpage, 'title', default=None)
+ if video_title:
+ video_title = limit_length(video_title, 80)
+ else:
+ video_title = 'Facebook video #%s' % video_id
+ uploader = clean_html(get_element_by_id(
+ 'fbPhotoPageAuthorName', webpage)) or self._search_regex(
+ r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader',
+ default=None) or self._og_search_title(webpage, fatal=False)
+ timestamp = int_or_none(self._search_regex(
+ r'<abbr[^>]+data-utime=["\'](\d+)', webpage,
+ 'timestamp', default=None))
+ thumbnail = self._html_search_meta(['og:image', 'twitter:image'], webpage)
+
+ view_count = parse_count(self._search_regex(
+ r'\bviewCount\s*:\s*["\']([\d,.]+)', webpage, 'view count',
+ default=None))
+
+ info_dict = {
+ 'id': video_id,
+ 'title': video_title,
+ 'formats': formats,
+ 'uploader': uploader,
+ 'timestamp': timestamp,
+ 'thumbnail': thumbnail,
+ 'view_count': view_count,
+ 'subtitles': subtitles,
+ }
+
+ return webpage, info_dict
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ real_url = self._VIDEO_PAGE_TEMPLATE % video_id if url.startswith('facebook:') else url
+ webpage, info_dict = self._extract_from_url(real_url, video_id, fatal_if_no_video=False)
+
+ if info_dict:
+ return info_dict
+
+ if '/posts/' in url:
+ video_id_json = self._search_regex(
+ r'(["\'])video_ids\1\s*:\s*(?P<ids>\[.+?\])', webpage, 'video ids', group='ids',
+ default='')
+ if video_id_json:
+ entries = [
+ self.url_result('facebook:%s' % vid, FacebookIE.ie_key())
+ for vid in self._parse_json(video_id_json, video_id)]
+ return self.playlist_result(entries, video_id)
+
+ # Single Video?
+ video_id = self._search_regex(r'video_id:\s*"([0-9]+)"', webpage, 'single video id')
+ return self.url_result('facebook:%s' % video_id, FacebookIE.ie_key())
+ else:
+ _, info_dict = self._extract_from_url(
+ self._VIDEO_PAGE_TEMPLATE % video_id,
+ video_id, fatal_if_no_video=True)
+ return info_dict
+
+
+class FacebookPluginsVideoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[\w-]+\.)?facebook\.com/plugins/video\.php\?.*?\bhref=(?P<id>https.+)'
+
+ _TESTS = [{
+ 'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fgov.sg%2Fvideos%2F10154383743583686%2F&show_text=0&width=560',
+ 'md5': '5954e92cdfe51fe5782ae9bda7058a07',
+ 'info_dict': {
+ 'id': '10154383743583686',
+ 'ext': 'mp4',
+ 'title': 'What to do during the haze?',
+ 'uploader': 'Gov.sg',
+ 'upload_date': '20160826',
+ 'timestamp': 1472184808,
+ },
+ 'add_ie': [FacebookIE.ie_key()],
+ }, {
+ 'url': 'https://www.facebook.com/plugins/video.php?href=https%3A%2F%2Fwww.facebook.com%2Fvideo.php%3Fv%3D10204634152394104',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.facebook.com/plugins/video.php?href=https://www.facebook.com/gov.sg/videos/10154383743583686/&show_text=0&width=560',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ return self.url_result(
+ compat_urllib_parse_unquote(self._match_id(url)),
+ FacebookIE.ie_key())
diff --git a/youtube_dl/extractor/faz.py b/youtube_dlc/extractor/faz.py
index 312ee2aee..312ee2aee 100644
--- a/youtube_dl/extractor/faz.py
+++ b/youtube_dlc/extractor/faz.py
diff --git a/youtube_dl/extractor/fc2.py b/youtube_dlc/extractor/fc2.py
index 435561147..435561147 100644
--- a/youtube_dl/extractor/fc2.py
+++ b/youtube_dlc/extractor/fc2.py
diff --git a/youtube_dl/extractor/fczenit.py b/youtube_dlc/extractor/fczenit.py
index 8db7c5963..8db7c5963 100644
--- a/youtube_dl/extractor/fczenit.py
+++ b/youtube_dlc/extractor/fczenit.py
diff --git a/youtube_dl/extractor/filmon.py b/youtube_dlc/extractor/filmon.py
index f775fe0ba..f775fe0ba 100644
--- a/youtube_dl/extractor/filmon.py
+++ b/youtube_dlc/extractor/filmon.py
diff --git a/youtube_dl/extractor/filmweb.py b/youtube_dlc/extractor/filmweb.py
index 56000bc5b..56000bc5b 100644
--- a/youtube_dl/extractor/filmweb.py
+++ b/youtube_dlc/extractor/filmweb.py
diff --git a/youtube_dl/extractor/firsttv.py b/youtube_dlc/extractor/firsttv.py
index 28617d83c..28617d83c 100644
--- a/youtube_dl/extractor/firsttv.py
+++ b/youtube_dlc/extractor/firsttv.py
diff --git a/youtube_dl/extractor/fivemin.py b/youtube_dlc/extractor/fivemin.py
index f3f876ecd..f3f876ecd 100644
--- a/youtube_dl/extractor/fivemin.py
+++ b/youtube_dlc/extractor/fivemin.py
diff --git a/youtube_dl/extractor/fivetv.py b/youtube_dlc/extractor/fivetv.py
index c4c0f1b3d..c4c0f1b3d 100644
--- a/youtube_dl/extractor/fivetv.py
+++ b/youtube_dlc/extractor/fivetv.py
diff --git a/youtube_dl/extractor/flickr.py b/youtube_dlc/extractor/flickr.py
index 9f166efd4..9f166efd4 100644
--- a/youtube_dl/extractor/flickr.py
+++ b/youtube_dlc/extractor/flickr.py
diff --git a/youtube_dl/extractor/folketinget.py b/youtube_dlc/extractor/folketinget.py
index b3df93f28..b3df93f28 100644
--- a/youtube_dl/extractor/folketinget.py
+++ b/youtube_dlc/extractor/folketinget.py
diff --git a/youtube_dl/extractor/footyroom.py b/youtube_dlc/extractor/footyroom.py
index 118325b6d..118325b6d 100644
--- a/youtube_dl/extractor/footyroom.py
+++ b/youtube_dlc/extractor/footyroom.py
diff --git a/youtube_dl/extractor/formula1.py b/youtube_dlc/extractor/formula1.py
index fecfc28ae..fecfc28ae 100644
--- a/youtube_dl/extractor/formula1.py
+++ b/youtube_dlc/extractor/formula1.py
diff --git a/youtube_dl/extractor/fourtube.py b/youtube_dlc/extractor/fourtube.py
index be4e81342..be4e81342 100644
--- a/youtube_dl/extractor/fourtube.py
+++ b/youtube_dlc/extractor/fourtube.py
diff --git a/youtube_dl/extractor/fox.py b/youtube_dlc/extractor/fox.py
index 04f4bdba6..04f4bdba6 100644
--- a/youtube_dl/extractor/fox.py
+++ b/youtube_dlc/extractor/fox.py
diff --git a/youtube_dlc/extractor/fox9.py b/youtube_dlc/extractor/fox9.py
new file mode 100644
index 000000000..91f8f7b8a
--- /dev/null
+++ b/youtube_dlc/extractor/fox9.py
@@ -0,0 +1,41 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class FOX9IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?fox9\.com/video/(?P<id>\d+)'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(
+ 'anvato:anvato_epfox_app_web_prod_b3373168e12f423f41504f207000188daf88251b:' + video_id,
+ 'Anvato', video_id)
+
+
+class FOX9NewsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?fox9\.com/news/(?P<id>[^/?&#]+)'
+ _TEST = {
+ 'url': 'https://www.fox9.com/news/black-bear-in-tree-draws-crowd-in-downtown-duluth-minnesota',
+ 'md5': 'd6e1b2572c3bab8a849c9103615dd243',
+ 'info_dict': {
+ 'id': '314473',
+ 'ext': 'mp4',
+ 'title': 'Bear climbs tree in downtown Duluth',
+ 'description': 'md5:6a36bfb5073a411758a752455408ac90',
+ 'duration': 51,
+ 'timestamp': 1478123580,
+ 'upload_date': '20161102',
+ 'uploader': 'EPFOX',
+ 'categories': ['News', 'Sports'],
+ 'tags': ['news', 'video'],
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ anvato_id = self._search_regex(
+ r'anvatoId\s*:\s*[\'"](\d+)', webpage, 'anvato id')
+ return self.url_result('https://www.fox9.com/video/' + anvato_id, 'FOX9')
diff --git a/youtube_dl/extractor/foxgay.py b/youtube_dlc/extractor/foxgay.py
index 512a10645..512a10645 100644
--- a/youtube_dl/extractor/foxgay.py
+++ b/youtube_dlc/extractor/foxgay.py
diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dlc/extractor/foxnews.py
index 63613cb85..63613cb85 100644
--- a/youtube_dl/extractor/foxnews.py
+++ b/youtube_dlc/extractor/foxnews.py
diff --git a/youtube_dl/extractor/foxsports.py b/youtube_dlc/extractor/foxsports.py
index 2b2cb6c6f..2b2cb6c6f 100644
--- a/youtube_dl/extractor/foxsports.py
+++ b/youtube_dlc/extractor/foxsports.py
diff --git a/youtube_dlc/extractor/franceculture.py b/youtube_dlc/extractor/franceculture.py
new file mode 100644
index 000000000..306b45fc9
--- /dev/null
+++ b/youtube_dlc/extractor/franceculture.py
@@ -0,0 +1,69 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ extract_attributes,
+ int_or_none,
+)
+
+
+class FranceCultureIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?franceculture\.fr/emissions/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'http://www.franceculture.fr/emissions/carnet-nomade/rendez-vous-au-pays-des-geeks',
+ 'info_dict': {
+ 'id': 'rendez-vous-au-pays-des-geeks',
+ 'display_id': 'rendez-vous-au-pays-des-geeks',
+ 'ext': 'mp3',
+ 'title': 'Rendez-vous au pays des geeks',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20140301',
+ 'timestamp': 1393642916,
+ 'vcodec': 'none',
+ }
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ video_data = extract_attributes(self._search_regex(
+ r'''(?sx)
+ (?:
+ </h1>|
+ <div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>
+ ).*?
+ (<button[^>]+data-asset-source="[^"]+"[^>]+>)
+ ''',
+ webpage, 'video data'))
+
+ video_url = video_data['data-asset-source']
+ title = video_data.get('data-asset-title') or self._og_search_title(webpage)
+
+ description = self._html_search_regex(
+ r'(?s)<div[^>]+class="intro"[^>]*>.*?<h2>(.+?)</h2>',
+ webpage, 'description', default=None)
+ thumbnail = self._search_regex(
+ r'(?s)<figure[^>]+itemtype="https://schema.org/ImageObject"[^>]*>.*?<img[^>]+(?:data-dejavu-)?src="([^"]+)"',
+ webpage, 'thumbnail', fatal=False)
+ uploader = self._html_search_regex(
+ r'(?s)<span class="author">(.*?)</span>',
+ webpage, 'uploader', default=None)
+ ext = determine_ext(video_url.lower())
+
+ return {
+ 'id': display_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'ext': ext,
+ 'vcodec': 'none' if ext == 'mp3' else None,
+ 'uploader': uploader,
+ 'timestamp': int_or_none(video_data.get('data-asset-created-date')),
+ 'duration': int_or_none(video_data.get('data-duration')),
+ }
diff --git a/youtube_dl/extractor/franceinter.py b/youtube_dlc/extractor/franceinter.py
index 05806895c..05806895c 100644
--- a/youtube_dl/extractor/franceinter.py
+++ b/youtube_dlc/extractor/franceinter.py
diff --git a/youtube_dlc/extractor/francetv.py b/youtube_dlc/extractor/francetv.py
new file mode 100644
index 000000000..e340cddba
--- /dev/null
+++ b/youtube_dlc/extractor/francetv.py
@@ -0,0 +1,518 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ clean_html,
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ parse_duration,
+ try_get,
+ url_or_none,
+)
+from .dailymotion import DailymotionIE
+
+
+class FranceTVBaseInfoExtractor(InfoExtractor):
+ def _make_url_result(self, video_or_full_id, catalog=None):
+ full_id = 'francetv:%s' % video_or_full_id
+ if '@' not in video_or_full_id and catalog:
+ full_id += '@%s' % catalog
+ return self.url_result(
+ full_id, ie=FranceTVIE.ie_key(),
+ video_id=video_or_full_id.split('@')[0])
+
+
+class FranceTVIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ https?://
+ sivideo\.webservices\.francetelevisions\.fr/tools/getInfosOeuvre/v2/\?
+ .*?\bidDiffusion=[^&]+|
+ (?:
+ https?://videos\.francetv\.fr/video/|
+ francetv:
+ )
+ (?P<id>[^@]+)(?:@(?P<catalog>.+))?
+ )
+ '''
+
+ _TESTS = [{
+ # without catalog
+ 'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=162311093&callback=_jsonp_loader_callback_request_0',
+ 'md5': 'c2248a8de38c4e65ea8fae7b5df2d84f',
+ 'info_dict': {
+ 'id': '162311093',
+ 'ext': 'mp4',
+ 'title': '13h15, le dimanche... - Les mystères de Jésus',
+ 'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
+ 'timestamp': 1502623500,
+ 'upload_date': '20170813',
+ },
+ }, {
+ # with catalog
+ 'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=NI_1004933&catalogue=Zouzous&callback=_jsonp_loader_callback_request_4',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videos.francetv.fr/video/NI_657393@Regions',
+ 'only_matching': True,
+ }, {
+ 'url': 'francetv:162311093',
+ 'only_matching': True,
+ }, {
+ 'url': 'francetv:NI_1004933@Zouzous',
+ 'only_matching': True,
+ }, {
+ 'url': 'francetv:NI_983319@Info-web',
+ 'only_matching': True,
+ }, {
+ 'url': 'francetv:NI_983319',
+ 'only_matching': True,
+ }, {
+ 'url': 'francetv:NI_657393@Regions',
+ 'only_matching': True,
+ }, {
+ # france-3 live
+ 'url': 'francetv:SIM_France3',
+ 'only_matching': True,
+ }]
+
+ def _extract_video(self, video_id, catalogue=None):
+ # Videos are identified by idDiffusion so catalogue part is optional.
+ # However when provided, some extra formats may be returned so we pass
+ # it if available.
+ info = self._download_json(
+ 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/',
+ video_id, 'Downloading video JSON', query={
+ 'idDiffusion': video_id,
+ 'catalogue': catalogue or '',
+ })
+
+ if info.get('status') == 'NOK':
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, info['message']),
+ expected=True)
+ allowed_countries = info['videos'][0].get('geoblocage')
+ if allowed_countries:
+ georestricted = True
+ geo_info = self._download_json(
+ 'http://geo.francetv.fr/ws/edgescape.json', video_id,
+ 'Downloading geo restriction info')
+ country = geo_info['reponse']['geo_info']['country_code']
+ if country not in allowed_countries:
+ raise ExtractorError(
+ 'The video is not available from your location',
+ expected=True)
+ else:
+ georestricted = False
+
+ def sign(manifest_url, manifest_id):
+ for host in ('hdfauthftv-a.akamaihd.net', 'hdfauth.francetv.fr'):
+ signed_url = url_or_none(self._download_webpage(
+ 'https://%s/esi/TA' % host, video_id,
+ 'Downloading signed %s manifest URL' % manifest_id,
+ fatal=False, query={
+ 'url': manifest_url,
+ }))
+ if signed_url:
+ return signed_url
+ return manifest_url
+
+ is_live = None
+
+ formats = []
+ for video in info['videos']:
+ if video['statut'] != 'ONLINE':
+ continue
+ video_url = video['url']
+ if not video_url:
+ continue
+ if is_live is None:
+ is_live = (try_get(
+ video, lambda x: x['plages_ouverture'][0]['direct'],
+ bool) is True) or '/live.francetv.fr/' in video_url
+ format_id = video['format']
+ ext = determine_ext(video_url)
+ if ext == 'f4m':
+ if georestricted:
+ # See https://github.com/ytdl-org/youtube-dl/issues/3963
+ # m3u8 urls work fine
+ continue
+ formats.extend(self._extract_f4m_formats(
+ sign(video_url, format_id) + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44',
+ video_id, f4m_id=format_id, fatal=False))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ sign(video_url, format_id), video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id=format_id,
+ fatal=False))
+ elif video_url.startswith('rtmp'):
+ formats.append({
+ 'url': video_url,
+ 'format_id': 'rtmp-%s' % format_id,
+ 'ext': 'flv',
+ })
+ else:
+ if self._is_valid_url(video_url, video_id, format_id):
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ })
+ self._sort_formats(formats)
+
+ title = info['titre']
+ subtitle = info.get('sous_titre')
+ if subtitle:
+ title += ' - %s' % subtitle
+ title = title.strip()
+
+ subtitles = {}
+ subtitles_list = [{
+ 'url': subformat['url'],
+ 'ext': subformat.get('format'),
+ } for subformat in info.get('subtitles', []) if subformat.get('url')]
+ if subtitles_list:
+ subtitles['fr'] = subtitles_list
+
+ return {
+ 'id': video_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'description': clean_html(info['synopsis']),
+ 'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', info['image']),
+ 'duration': int_or_none(info.get('real_duration')) or parse_duration(info['duree']),
+ 'timestamp': int_or_none(info['diffusion']['timestamp']),
+ 'is_live': is_live,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ catalog = mobj.group('catalog')
+
+ if not video_id:
+ qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ video_id = qs.get('idDiffusion', [None])[0]
+ catalog = qs.get('catalogue', [None])[0]
+ if not video_id:
+ raise ExtractorError('Invalid URL', expected=True)
+
+ return self._extract_video(video_id, catalog)
+
+
+class FranceTVSiteIE(FranceTVBaseInfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www\.)?france\.tv|mobile\.france\.tv)/(?:[^/]+/)*(?P<id>[^/]+)\.html'
+
+ _TESTS = [{
+ 'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html',
+ 'info_dict': {
+ 'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1',
+ 'ext': 'mp4',
+ 'title': '13h15, le dimanche... - Les mystères de Jésus',
+ 'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42',
+ 'timestamp': 1502623500,
+ 'upload_date': '20170813',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [FranceTVIE.ie_key()],
+ }, {
+ # france3
+ 'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html',
+ 'only_matching': True,
+ }, {
+ # france4
+ 'url': 'https://www.france.tv/france-4/hero-corp/saison-1/134151-apres-le-calme.html',
+ 'only_matching': True,
+ }, {
+ # france5
+ 'url': 'https://www.france.tv/france-5/c-a-dire/saison-10/137013-c-a-dire.html',
+ 'only_matching': True,
+ }, {
+ # franceo
+ 'url': 'https://www.france.tv/france-o/archipels/132249-mon-ancetre-l-esclave.html',
+ 'only_matching': True,
+ }, {
+ # france2 live
+ 'url': 'https://www.france.tv/france-2/direct.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.france.tv/documentaires/histoire/136517-argentine-les-500-bebes-voles-de-la-dictature.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.france.tv/jeux-et-divertissements/divertissements/133965-le-web-contre-attaque.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://mobile.france.tv/france-5/c-dans-l-air/137347-emission-du-vendredi-12-mai-2017.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.france.tv/142749-rouge-sang.html',
+ 'only_matching': True,
+ }, {
+ # france-3 live
+ 'url': 'https://www.france.tv/france-3/direct.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ catalogue = None
+ video_id = self._search_regex(
+ r'(?:data-main-video\s*=|videoId["\']?\s*[:=])\s*(["\'])(?P<id>(?:(?!\1).)+)\1',
+ webpage, 'video id', default=None, group='id')
+
+ if not video_id:
+ video_id, catalogue = self._html_search_regex(
+ r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"',
+ webpage, 'video ID').split('@')
+
+ return self._make_url_result(video_id, catalogue)
+
+
+class FranceTVEmbedIE(FranceTVBaseInfoExtractor):
+ _VALID_URL = r'https?://embed\.francetv\.fr/*\?.*?\bue=(?P<id>[^&]+)'
+
+ _TESTS = [{
+ 'url': 'http://embed.francetv.fr/?ue=7fd581a2ccf59d2fc5719c5c13cf6961',
+ 'info_dict': {
+ 'id': 'NI_983319',
+ 'ext': 'mp4',
+ 'title': 'Le Pen Reims',
+ 'upload_date': '20170505',
+ 'timestamp': 1493981780,
+ 'duration': 16,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [FranceTVIE.ie_key()],
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'http://api-embed.webservices.francetelevisions.fr/key/%s' % video_id,
+ video_id)
+
+ return self._make_url_result(video['video_id'], video.get('catalog'))
+
+
+class FranceTVInfoIE(FranceTVBaseInfoExtractor):
+ IE_NAME = 'francetvinfo.fr'
+ _VALID_URL = r'https?://(?:www|mobile|france3-regions)\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&.]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-jeudi-22-aout-2019_3561461.html',
+ 'info_dict': {
+ 'id': 'd12458ee-5062-48fe-bfdd-a30d6a01b793',
+ 'ext': 'mp4',
+ 'title': 'Soir 3',
+ 'upload_date': '20190822',
+ 'timestamp': 1566510900,
+ 'description': 'md5:72d167097237701d6e8452ff03b83c00',
+ 'subtitles': {
+ 'fr': 'mincount:2',
+ },
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [FranceTVIE.ie_key()],
+ }, {
+ 'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://france3-regions.francetvinfo.fr/bretagne/cotes-d-armor/thalassa-echappee-breizh-ce-venredi-dans-les-cotes-d-armor-954961.html',
+ 'only_matching': True,
+ }, {
+ # Dailymotion embed
+ 'url': 'http://www.francetvinfo.fr/politique/notre-dame-des-landes/video-sur-france-inter-cecile-duflot-denonce-le-regard-meprisant-de-patrick-cohen_1520091.html',
+ 'md5': 'ee7f1828f25a648addc90cb2687b1f12',
+ 'info_dict': {
+ 'id': 'x4iiko0',
+ 'ext': 'mp4',
+ 'title': 'NDDL, référendum, Brexit : Cécile Duflot répond à Patrick Cohen',
+ 'description': 'Au lendemain de la victoire du "oui" au référendum sur l\'aéroport de Notre-Dame-des-Landes, l\'ancienne ministre écologiste est l\'invitée de Patrick Cohen. Plus d\'info : https://www.franceinter.fr/emissions/le-7-9/le-7-9-27-juin-2016',
+ 'timestamp': 1467011958,
+ 'upload_date': '20160627',
+ 'uploader': 'France Inter',
+ 'uploader_id': 'x2q2ez',
+ },
+ 'add_ie': ['Dailymotion'],
+ }, {
+ 'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ dailymotion_urls = DailymotionIE._extract_urls(webpage)
+ if dailymotion_urls:
+ return self.playlist_result([
+ self.url_result(dailymotion_url, DailymotionIE.ie_key())
+ for dailymotion_url in dailymotion_urls])
+
+ video_id = self._search_regex(
+ (r'player\.load[^;]+src:\s*["\']([^"\']+)',
+ r'id-video=([^@]+@[^"]+)',
+ r'<a[^>]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"',
+ r'data-id="([^"]+)"'),
+ webpage, 'video id')
+
+ return self._make_url_result(video_id)
+
+
+class FranceTVInfoSportIE(FranceTVBaseInfoExtractor):
+ IE_NAME = 'sport.francetvinfo.fr'
+ _VALID_URL = r'https?://sport\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://sport.francetvinfo.fr/les-jeux-olympiques/retour-sur-les-meilleurs-moments-de-pyeongchang-2018',
+ 'info_dict': {
+ 'id': '6e49080e-3f45-11e8-b459-000d3a2439ea',
+ 'ext': 'mp4',
+ 'title': 'Retour sur les meilleurs moments de Pyeongchang 2018',
+ 'timestamp': 1523639962,
+ 'upload_date': '20180413',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [FranceTVIE.ie_key()],
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._search_regex(r'data-video="([^"]+)"', webpage, 'video_id')
+ return self._make_url_result(video_id, 'Sport-web')
+
+
+class GenerationWhatIE(InfoExtractor):
+ IE_NAME = 'france2.fr:generation-what'
+ _VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P<id>[^/?#&]+)'
+
+ _TESTS = [{
+ 'url': 'http://generation-what.francetv.fr/portrait/video/present-arms',
+ 'info_dict': {
+ 'id': 'wtvKYUG45iw',
+ 'ext': 'mp4',
+ 'title': 'Generation What - Garde à vous - FRA',
+ 'uploader': 'Generation What',
+ 'uploader_id': 'UCHH9p1eetWCgt4kXBYCb3_w',
+ 'upload_date': '20160411',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Youtube'],
+ }, {
+ 'url': 'http://generation-what.francetv.fr/europe/video/present-arms',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ youtube_id = self._search_regex(
+ r"window\.videoURL\s*=\s*'([0-9A-Za-z_-]{11})';",
+ webpage, 'youtube id')
+
+ return self.url_result(youtube_id, ie='Youtube', video_id=youtube_id)
+
+
+class CultureboxIE(FranceTVBaseInfoExtractor):
+ _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+
+ _TESTS = [{
+ 'url': 'https://culturebox.francetvinfo.fr/opera-classique/musique-classique/c-est-baroque/concerts/cantates-bwv-4-106-et-131-de-bach-par-raphael-pichon-57-268689',
+ 'info_dict': {
+ 'id': 'EV_134885',
+ 'ext': 'mp4',
+ 'title': 'Cantates BWV 4, 106 et 131 de Bach par Raphaël Pichon 5/7',
+ 'description': 'md5:19c44af004b88219f4daa50fa9a351d4',
+ 'upload_date': '20180206',
+ 'timestamp': 1517945220,
+ 'duration': 5981,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [FranceTVIE.ie_key()],
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ if ">Ce live n'est plus disponible en replay<" in webpage:
+ raise ExtractorError(
+ 'Video %s is not available' % display_id, expected=True)
+
+ video_id, catalogue = self._search_regex(
+ r'["\'>]https?://videos\.francetv\.fr/video/([^@]+@.+?)["\'<]',
+ webpage, 'video id').split('@')
+
+ return self._make_url_result(video_id, catalogue)
+
+
+class FranceTVJeunesseIE(FranceTVBaseInfoExtractor):
+ _VALID_URL = r'(?P<url>https?://(?:www\.)?(?:zouzous|ludo)\.fr/heros/(?P<id>[^/?#&]+))'
+
+ _TESTS = [{
+ 'url': 'https://www.zouzous.fr/heros/simon',
+ 'info_dict': {
+ 'id': 'simon',
+ },
+ 'playlist_count': 9,
+ }, {
+ 'url': 'https://www.ludo.fr/heros/ninjago',
+ 'info_dict': {
+ 'id': 'ninjago',
+ },
+ 'playlist_count': 10,
+ }, {
+ 'url': 'https://www.zouzous.fr/heros/simon?abc',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ playlist_id = mobj.group('id')
+
+ playlist = self._download_json(
+ '%s/%s' % (mobj.group('url'), 'playlist'), playlist_id)
+
+ if not playlist.get('count'):
+ raise ExtractorError(
+ '%s is not available' % playlist_id, expected=True)
+
+ entries = []
+ for item in playlist['items']:
+ identity = item.get('identity')
+ if identity and isinstance(identity, compat_str):
+ entries.append(self._make_url_result(identity))
+
+ return self.playlist_result(entries, playlist_id)
diff --git a/youtube_dl/extractor/freesound.py b/youtube_dlc/extractor/freesound.py
index 138b6bc58..138b6bc58 100644
--- a/youtube_dl/extractor/freesound.py
+++ b/youtube_dlc/extractor/freesound.py
diff --git a/youtube_dl/extractor/freespeech.py b/youtube_dlc/extractor/freespeech.py
index ea9c3e317..ea9c3e317 100644
--- a/youtube_dl/extractor/freespeech.py
+++ b/youtube_dlc/extractor/freespeech.py
diff --git a/youtube_dl/extractor/freshlive.py b/youtube_dlc/extractor/freshlive.py
index 72a845945..72a845945 100644
--- a/youtube_dl/extractor/freshlive.py
+++ b/youtube_dlc/extractor/freshlive.py
diff --git a/youtube_dl/extractor/frontendmasters.py b/youtube_dlc/extractor/frontendmasters.py
index f1db33fb1..f1db33fb1 100644
--- a/youtube_dl/extractor/frontendmasters.py
+++ b/youtube_dlc/extractor/frontendmasters.py
diff --git a/youtube_dl/extractor/funimation.py b/youtube_dlc/extractor/funimation.py
index 8bbedca26..8bbedca26 100644
--- a/youtube_dl/extractor/funimation.py
+++ b/youtube_dlc/extractor/funimation.py
diff --git a/youtube_dl/extractor/funk.py b/youtube_dlc/extractor/funk.py
index 81d1949fd..81d1949fd 100644
--- a/youtube_dl/extractor/funk.py
+++ b/youtube_dlc/extractor/funk.py
diff --git a/youtube_dl/extractor/fusion.py b/youtube_dlc/extractor/fusion.py
index a3f44b812..a3f44b812 100644
--- a/youtube_dl/extractor/fusion.py
+++ b/youtube_dlc/extractor/fusion.py
diff --git a/youtube_dl/extractor/fxnetworks.py b/youtube_dlc/extractor/fxnetworks.py
index 00e67426b..00e67426b 100644
--- a/youtube_dl/extractor/fxnetworks.py
+++ b/youtube_dlc/extractor/fxnetworks.py
diff --git a/youtube_dl/extractor/gaia.py b/youtube_dlc/extractor/gaia.py
index e9527758f..e9527758f 100644
--- a/youtube_dl/extractor/gaia.py
+++ b/youtube_dlc/extractor/gaia.py
diff --git a/youtube_dl/extractor/gameinformer.py b/youtube_dlc/extractor/gameinformer.py
index f1b96c172..f1b96c172 100644
--- a/youtube_dl/extractor/gameinformer.py
+++ b/youtube_dlc/extractor/gameinformer.py
diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dlc/extractor/gamespot.py
index 4236a5ed8..4236a5ed8 100644
--- a/youtube_dl/extractor/gamespot.py
+++ b/youtube_dlc/extractor/gamespot.py
diff --git a/youtube_dl/extractor/gamestar.py b/youtube_dlc/extractor/gamestar.py
index f00dab2f3..f00dab2f3 100644
--- a/youtube_dl/extractor/gamestar.py
+++ b/youtube_dlc/extractor/gamestar.py
diff --git a/youtube_dl/extractor/gaskrank.py b/youtube_dlc/extractor/gaskrank.py
index 1726a6704..1726a6704 100644
--- a/youtube_dl/extractor/gaskrank.py
+++ b/youtube_dlc/extractor/gaskrank.py
diff --git a/youtube_dl/extractor/gazeta.py b/youtube_dlc/extractor/gazeta.py
index 57c67a451..57c67a451 100644
--- a/youtube_dl/extractor/gazeta.py
+++ b/youtube_dlc/extractor/gazeta.py
diff --git a/youtube_dl/extractor/gdcvault.py b/youtube_dlc/extractor/gdcvault.py
index 2f555c1d4..2f555c1d4 100644
--- a/youtube_dl/extractor/gdcvault.py
+++ b/youtube_dlc/extractor/gdcvault.py
diff --git a/youtube_dlc/extractor/generic.py b/youtube_dlc/extractor/generic.py
new file mode 100644
index 000000000..aba06b328
--- /dev/null
+++ b/youtube_dlc/extractor/generic.py
@@ -0,0 +1,3459 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+import os
+import re
+import sys
+
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+from ..compat import (
+ compat_etree_fromstring,
+ compat_str,
+ compat_urllib_parse_unquote,
+ compat_urlparse,
+ compat_xml_parse_error,
+)
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ float_or_none,
+ HEADRequest,
+ is_html,
+ js_to_json,
+ KNOWN_EXTENSIONS,
+ merge_dicts,
+ mimetype2ext,
+ orderedSet,
+ sanitized_Request,
+ smuggle_url,
+ unescapeHTML,
+ unified_strdate,
+ unsmuggle_url,
+ UnsupportedError,
+ xpath_text,
+)
+from .commonprotocols import RtmpIE
+from .brightcove import (
+ BrightcoveLegacyIE,
+ BrightcoveNewIE,
+)
+from .nexx import (
+ NexxIE,
+ NexxEmbedIE,
+)
+from .nbc import NBCSportsVPlayerIE
+from .ooyala import OoyalaIE
+from .rutv import RUTVIE
+from .tvc import TVCIE
+from .sportbox import SportBoxIE
+from .smotri import SmotriIE
+from .myvi import MyviIE
+from .condenast import CondeNastIE
+from .udn import UDNEmbedIE
+from .senateisvp import SenateISVPIE
+from .svt import SVTIE
+from .pornhub import PornHubIE
+from .xhamster import XHamsterEmbedIE
+from .tnaflix import TNAFlixNetworkEmbedIE
+from .drtuber import DrTuberIE
+from .redtube import RedTubeIE
+from .tube8 import Tube8IE
+from .mofosex import MofosexEmbedIE
+from .spankwire import SpankwireIE
+from .youporn import YouPornIE
+from .vimeo import VimeoIE
+from .dailymotion import DailymotionIE
+from .dailymail import DailyMailIE
+from .onionstudios import OnionStudiosIE
+from .viewlift import ViewLiftEmbedIE
+from .mtv import MTVServicesEmbeddedIE
+from .pladform import PladformIE
+from .videomore import VideomoreIE
+from .webcaster import WebcasterFeedIE
+from .googledrive import GoogleDriveIE
+from .jwplatform import JWPlatformIE
+from .digiteka import DigitekaIE
+from .arkena import ArkenaIE
+from .instagram import InstagramIE
+from .liveleak import LiveLeakIE
+from .threeqsdn import ThreeQSDNIE
+from .theplatform import ThePlatformIE
+from .kaltura import KalturaIE
+from .eagleplatform import EaglePlatformIE
+from .facebook import FacebookIE
+from .soundcloud import SoundcloudEmbedIE
+from .tunein import TuneInBaseIE
+from .vbox7 import Vbox7IE
+from .dbtv import DBTVIE
+from .piksel import PikselIE
+from .videa import VideaIE
+from .twentymin import TwentyMinutenIE
+from .ustream import UstreamIE
+from .videopress import VideoPressIE
+from .rutube import RutubeIE
+from .limelight import LimelightBaseIE
+from .anvato import AnvatoIE
+from .washingtonpost import WashingtonPostIE
+from .wistia import WistiaIE
+from .mediaset import MediasetIE
+from .joj import JojIE
+from .megaphone import MegaphoneIE
+from .vzaar import VzaarIE
+from .channel9 import Channel9IE
+from .vshare import VShareIE
+from .mediasite import MediasiteIE
+from .springboardplatform import SpringboardPlatformIE
+from .yapfiles import YapFilesIE
+from .vice import ViceIE
+from .xfileshare import XFileShareIE
+from .cloudflarestream import CloudflareStreamIE
+from .peertube import PeerTubeIE
+from .teachable import TeachableIE
+from .indavideo import IndavideoEmbedIE
+from .apa import APAIE
+from .foxnews import FoxNewsIE
+from .viqeo import ViqeoIE
+from .expressen import ExpressenIE
+from .zype import ZypeIE
+from .odnoklassniki import OdnoklassnikiIE
+from .kinja import KinjaEmbedIE
+
+
+class GenericIE(InfoExtractor):
+ IE_DESC = 'Generic downloader that works on some sites'
+ _VALID_URL = r'.*'
+ IE_NAME = 'generic'
+ _TESTS = [
+ # Direct link to a video
+ {
+ 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
+ 'md5': '67d406c2bcb6af27fa886f31aa934bbe',
+ 'info_dict': {
+ 'id': 'trailer',
+ 'ext': 'mp4',
+ 'title': 'trailer',
+ 'upload_date': '20100513',
+ }
+ },
+ # Direct link to media delivered compressed (until Accept-Encoding is *)
+ {
+ 'url': 'http://calimero.tk/muzik/FictionJunction-Parallel_Hearts.flac',
+ 'md5': '128c42e68b13950268b648275386fc74',
+ 'info_dict': {
+ 'id': 'FictionJunction-Parallel_Hearts',
+ 'ext': 'flac',
+ 'title': 'FictionJunction-Parallel_Hearts',
+ 'upload_date': '20140522',
+ },
+ 'expected_warnings': [
+ 'URL could be a direct video link, returning it as such.'
+ ],
+ 'skip': 'URL invalid',
+ },
+ # Direct download with broken HEAD
+ {
+ 'url': 'http://ai-radio.org:8000/radio.opus',
+ 'info_dict': {
+ 'id': 'radio',
+ 'ext': 'opus',
+ 'title': 'radio',
+ },
+ 'params': {
+ 'skip_download': True, # infinite live stream
+ },
+ 'expected_warnings': [
+ r'501.*Not Implemented',
+ r'400.*Bad Request',
+ ],
+ },
+ # Direct link with incorrect MIME type
+ {
+ 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
+ 'md5': '4ccbebe5f36706d85221f204d7eb5913',
+ 'info_dict': {
+ 'url': 'http://ftp.nluug.nl/video/nluug/2014-11-20_nj14/zaal-2/5_Lennart_Poettering_-_Systemd.webm',
+ 'id': '5_Lennart_Poettering_-_Systemd',
+ 'ext': 'webm',
+ 'title': '5_Lennart_Poettering_-_Systemd',
+ 'upload_date': '20141120',
+ },
+ 'expected_warnings': [
+ 'URL could be a direct video link, returning it as such.'
+ ]
+ },
+ # RSS feed
+ {
+ 'url': 'http://phihag.de/2014/youtube-dl/rss2.xml',
+ 'info_dict': {
+ 'id': 'http://phihag.de/2014/youtube-dl/rss2.xml',
+ 'title': 'Zero Punctuation',
+ 'description': 're:.*groundbreaking video review series.*'
+ },
+ 'playlist_mincount': 11,
+ },
+ # RSS feed with enclosure
+ {
+ 'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
+ 'info_dict': {
+ 'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
+ 'ext': 'm4v',
+ 'upload_date': '20150228',
+ 'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
+ }
+ },
+ # RSS feed with enclosures and unsupported link URLs
+ {
+ 'url': 'http://www.hellointernet.fm/podcast?format=rss',
+ 'info_dict': {
+ 'id': 'http://www.hellointernet.fm/podcast?format=rss',
+ 'description': 'CGP Grey and Brady Haran talk about YouTube, life, work, whatever.',
+ 'title': 'Hello Internet',
+ },
+ 'playlist_mincount': 100,
+ },
+ # SMIL from http://videolectures.net/promogram_igor_mekjavic_eng
+ {
+ 'url': 'http://videolectures.net/promogram_igor_mekjavic_eng/video/1/smil.xml',
+ 'info_dict': {
+ 'id': 'smil',
+ 'ext': 'mp4',
+ 'title': 'Automatics, robotics and biocybernetics',
+ 'description': 'md5:815fc1deb6b3a2bff99de2d5325be482',
+ 'upload_date': '20130627',
+ 'formats': 'mincount:16',
+ 'subtitles': 'mincount:1',
+ },
+ 'params': {
+ 'force_generic_extractor': True,
+ 'skip_download': True,
+ },
+ },
+ # SMIL from http://www1.wdr.de/mediathek/video/livestream/index.html
+ {
+ 'url': 'http://metafilegenerator.de/WDR/WDR_FS/hds/hds.smil',
+ 'info_dict': {
+ 'id': 'hds',
+ 'ext': 'flv',
+ 'title': 'hds',
+ 'formats': 'mincount:1',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # SMIL from https://www.restudy.dk/video/play/id/1637
+ {
+ 'url': 'https://www.restudy.dk/awsmedia/SmilDirectory/video_1637.xml',
+ 'info_dict': {
+ 'id': 'video_1637',
+ 'ext': 'flv',
+ 'title': 'video_1637',
+ 'formats': 'mincount:3',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # SMIL from http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm
+ {
+ 'url': 'http://services.media.howstuffworks.com/videos/450221/smil-service.smil',
+ 'info_dict': {
+ 'id': 'smil-service',
+ 'ext': 'flv',
+ 'title': 'smil-service',
+ 'formats': 'mincount:1',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # SMIL from http://new.livestream.com/CoheedandCambria/WebsterHall/videos/4719370
+ {
+ 'url': 'http://api.new.livestream.com/accounts/1570303/events/1585861/videos/4719370.smil',
+ 'info_dict': {
+ 'id': '4719370',
+ 'ext': 'mp4',
+ 'title': '571de1fd-47bc-48db-abf9-238872a58d1f',
+ 'formats': 'mincount:3',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # XSPF playlist from http://www.telegraaf.nl/tv/nieuws/binnenland/24353229/__Tikibad_ontruimd_wegens_brand__.html
+ {
+ 'url': 'http://www.telegraaf.nl/xml/playlist/2015/8/7/mZlp2ctYIUEB.xspf',
+ 'info_dict': {
+ 'id': 'mZlp2ctYIUEB',
+ 'ext': 'mp4',
+ 'title': 'Tikibad ontruimd wegens brand',
+ 'description': 'md5:05ca046ff47b931f9b04855015e163a4',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 33,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # MPD from http://dash-mse-test.appspot.com/media.html
+ {
+ 'url': 'http://yt-dash-mse-test.commondatastorage.googleapis.com/media/car-20120827-manifest.mpd',
+ 'md5': '4b57baab2e30d6eb3a6a09f0ba57ef53',
+ 'info_dict': {
+ 'id': 'car-20120827-manifest',
+ 'ext': 'mp4',
+ 'title': 'car-20120827-manifest',
+ 'formats': 'mincount:9',
+ 'upload_date': '20130904',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ },
+ },
+ # m3u8 served with Content-Type: audio/x-mpegURL; charset=utf-8
+ {
+ 'url': 'http://once.unicornmedia.com/now/master/playlist/bb0b18ba-64f5-4b1b-a29f-0ac252f06b68/77a785f3-5188-4806-b788-0893a61634ed/93677179-2d99-4ef4-9e17-fe70d49abfbf/content.m3u8',
+ 'info_dict': {
+ 'id': 'content',
+ 'ext': 'mp4',
+ 'title': 'content',
+ 'formats': 'mincount:8',
+ },
+ 'params': {
+ # m3u8 downloads
+ 'skip_download': True,
+ },
+ 'skip': 'video gone',
+ },
+ # m3u8 served with Content-Type: text/plain
+ {
+ 'url': 'http://www.nacentapps.com/m3u8/index.m3u8',
+ 'info_dict': {
+ 'id': 'index',
+ 'ext': 'mp4',
+ 'title': 'index',
+ 'upload_date': '20140720',
+ 'formats': 'mincount:11',
+ },
+ 'params': {
+ # m3u8 downloads
+ 'skip_download': True,
+ },
+ 'skip': 'video gone',
+ },
+ # google redirect
+ {
+ 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
+ 'info_dict': {
+ 'id': 'cmQHVoWB5FY',
+ 'ext': 'mp4',
+ 'upload_date': '20130224',
+ 'uploader_id': 'TheVerge',
+ 'description': r're:^Chris Ziegler takes a look at the\.*',
+ 'uploader': 'The Verge',
+ 'title': 'First Firefox OS phones side-by-side',
+ },
+ 'params': {
+ 'skip_download': False,
+ }
+ },
+ {
+ # redirect in Refresh HTTP header
+ 'url': 'https://www.facebook.com/l.php?u=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DpO8h3EaFRdo&h=TAQHsoToz&enc=AZN16h-b6o4Zq9pZkCCdOLNKMN96BbGMNtcFwHSaazus4JHT_MFYkAA-WARTX2kvsCIdlAIyHZjl6d33ILIJU7Jzwk_K3mcenAXoAzBNoZDI_Q7EXGDJnIhrGkLXo_LJ_pAa2Jzbx17UHMd3jAs--6j2zaeto5w9RTn8T_1kKg3fdC5WPX9Dbb18vzH7YFX0eSJmoa6SP114rvlkw6pkS1-T&s=1',
+ 'info_dict': {
+ 'id': 'pO8h3EaFRdo',
+ 'ext': 'mp4',
+ 'title': 'Tripeo Boiler Room x Dekmantel Festival DJ Set',
+ 'description': 'md5:6294cc1af09c4049e0652b51a2df10d5',
+ 'upload_date': '20150917',
+ 'uploader_id': 'brtvofficial',
+ 'uploader': 'Boiler Room',
+ },
+ 'params': {
+ 'skip_download': False,
+ },
+ },
+ {
+ 'url': 'http://www.hodiho.fr/2013/02/regis-plante-sa-jeep.html',
+ 'md5': '85b90ccc9d73b4acd9138d3af4c27f89',
+ 'info_dict': {
+ 'id': '13601338388002',
+ 'ext': 'mp4',
+ 'uploader': 'www.hodiho.fr',
+ 'title': 'R\u00e9gis plante sa Jeep',
+ }
+ },
+ # bandcamp page with custom domain
+ {
+ 'add_ie': ['Bandcamp'],
+ 'url': 'http://bronyrock.com/track/the-pony-mash',
+ 'info_dict': {
+ 'id': '3235767654',
+ 'ext': 'mp3',
+ 'title': 'The Pony Mash',
+ 'uploader': 'M_Pallante',
+ },
+ 'skip': 'There is a limit of 200 free downloads / month for the test song',
+ },
+ {
+ # embedded brightcove video
+ # it also tests brightcove videos that need to set the 'Referer'
+ # in the http requests
+ 'add_ie': ['BrightcoveLegacy'],
+ 'url': 'http://www.bfmtv.com/video/bfmbusiness/cours-bourse/cours-bourse-l-analyse-technique-154522/',
+ 'info_dict': {
+ 'id': '2765128793001',
+ 'ext': 'mp4',
+ 'title': 'Le cours de bourse : l’analyse technique',
+ 'description': 'md5:7e9ad046e968cb2d1114004aba466fd9',
+ 'uploader': 'BFM BUSINESS',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # embedded with itemprop embedURL and video id spelled as `idVideo`
+ 'add_id': ['BrightcoveLegacy'],
+ 'url': 'http://bfmbusiness.bfmtv.com/mediaplayer/chroniques/olivier-delamarche/',
+ 'info_dict': {
+ 'id': '5255628253001',
+ 'ext': 'mp4',
+ 'title': 'md5:37c519b1128915607601e75a87995fc0',
+ 'description': 'md5:37f7f888b434bb8f8cc8dbd4f7a4cf26',
+ 'uploader': 'BFM BUSINESS',
+ 'uploader_id': '876450612001',
+ 'timestamp': 1482255315,
+ 'upload_date': '20161220',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # https://github.com/ytdl-org/youtube-dl/issues/2253
+ 'url': 'http://bcove.me/i6nfkrc3',
+ 'md5': '0ba9446db037002366bab3b3eb30c88c',
+ 'info_dict': {
+ 'id': '3101154703001',
+ 'ext': 'mp4',
+ 'title': 'Still no power',
+ 'uploader': 'thestar.com',
+ 'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
+ },
+ 'add_ie': ['BrightcoveLegacy'],
+ 'skip': 'video gone',
+ },
+ {
+ 'url': 'http://www.championat.com/video/football/v/87/87499.html',
+ 'md5': 'fb973ecf6e4a78a67453647444222983',
+ 'info_dict': {
+ 'id': '3414141473001',
+ 'ext': 'mp4',
+ 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
+ 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
+ 'uploader': 'Championat',
+ },
+ },
+ {
+ # https://github.com/ytdl-org/youtube-dl/issues/3541
+ 'add_ie': ['BrightcoveLegacy'],
+ 'url': 'http://www.kijk.nl/sbs6/leermijvrouwenkennen/videos/jqMiXKAYan2S/aflevering-1',
+ 'info_dict': {
+ 'id': '3866516442001',
+ 'ext': 'mp4',
+ 'title': 'Leer mij vrouwen kennen: Aflevering 1',
+ 'description': 'Leer mij vrouwen kennen: Aflevering 1',
+ 'uploader': 'SBS Broadcasting',
+ },
+ 'skip': 'Restricted to Netherlands',
+ 'params': {
+ 'skip_download': True, # m3u8 download
+ },
+ },
+ {
+ # Brightcove video in <iframe>
+ 'url': 'http://www.un.org/chinese/News/story.asp?NewsID=27724',
+ 'md5': '36d74ef5e37c8b4a2ce92880d208b968',
+ 'info_dict': {
+ 'id': '5360463607001',
+ 'ext': 'mp4',
+ 'title': '叙利亚失明儿童在废墟上演唱《心跳》 呼吁获得正常童年生活',
+ 'description': '联合国儿童基金会中东和北非区域大使、作曲家扎德·迪拉尼(Zade Dirani)在3月15日叙利亚冲突爆发7周年纪念日之际发布了为叙利亚谱写的歌曲《心跳》(HEARTBEAT),为受到六年冲突影响的叙利亚儿童发出强烈呐喊,呼吁世界做出共同努力,使叙利亚儿童重新获得享有正常童年生活的权利。',
+ 'uploader': 'United Nations',
+ 'uploader_id': '1362235914001',
+ 'timestamp': 1489593889,
+ 'upload_date': '20170315',
+ },
+ 'add_ie': ['BrightcoveLegacy'],
+ },
+ {
+ # Brightcove with alternative playerID key
+ 'url': 'http://www.nature.com/nmeth/journal/v9/n7/fig_tab/nmeth.2062_SV1.html',
+ 'info_dict': {
+ 'id': 'nmeth.2062_SV1',
+ 'title': 'Simultaneous multiview imaging of the Drosophila syncytial blastoderm : Quantitative high-speed imaging of entire developing embryos with simultaneous multiview light-sheet microscopy : Nature Methods : Nature Research',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '2228375078001',
+ 'ext': 'mp4',
+ 'title': 'nmeth.2062-sv1',
+ 'description': 'nmeth.2062-sv1',
+ 'timestamp': 1363357591,
+ 'upload_date': '20130315',
+ 'uploader': 'Nature Publishing Group',
+ 'uploader_id': '1964492299001',
+ },
+ }],
+ },
+ {
+ # Brightcove with UUID in videoPlayer
+ 'url': 'http://www8.hp.com/cn/zh/home.html',
+ 'info_dict': {
+ 'id': '5255815316001',
+ 'ext': 'mp4',
+ 'title': 'Sprocket Video - China',
+ 'description': 'Sprocket Video - China',
+ 'uploader': 'HP-Video Gallery',
+ 'timestamp': 1482263210,
+ 'upload_date': '20161220',
+ 'uploader_id': '1107601872001',
+ },
+ 'params': {
+ 'skip_download': True, # m3u8 download
+ },
+ 'skip': 'video rotates...weekly?',
+ },
+ {
+ # Brightcove:new type [2].
+ 'url': 'http://www.delawaresportszone.com/video-st-thomas-more-earns-first-trip-to-basketball-semis',
+ 'md5': '2b35148fcf48da41c9fb4591650784f3',
+ 'info_dict': {
+ 'id': '5348741021001',
+ 'ext': 'mp4',
+ 'upload_date': '20170306',
+ 'uploader_id': '4191638492001',
+ 'timestamp': 1488769918,
+ 'title': 'VIDEO: St. Thomas More earns first trip to basketball semis',
+
+ },
+ },
+ {
+ # Alternative brightcove <video> attributes
+ 'url': 'http://www.programme-tv.net/videos/extraits/81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche/',
+ 'info_dict': {
+ 'id': '81095-guillaume-canet-evoque-les-rumeurs-d-infidelite-de-marion-cotillard-avec-brad-pitt-dans-vivement-dimanche',
+ 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche, Extraits : toutes les vidéos avec Télé-Loisirs",
+ },
+ 'playlist': [{
+ 'md5': '732d22ba3d33f2f3fc253c39f8f36523',
+ 'info_dict': {
+ 'id': '5311302538001',
+ 'ext': 'mp4',
+ 'title': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche",
+ 'description': "Guillaume Canet évoque les rumeurs d'infidélité de Marion Cotillard avec Brad Pitt dans Vivement Dimanche (France 2, 5 février 2017)",
+ 'timestamp': 1486321708,
+ 'upload_date': '20170205',
+ 'uploader_id': '800000640001',
+ },
+ 'only_matching': True,
+ }],
+ },
+ {
+ # Brightcove with UUID in videoPlayer
+ 'url': 'http://www8.hp.com/cn/zh/home.html',
+ 'info_dict': {
+ 'id': '5255815316001',
+ 'ext': 'mp4',
+ 'title': 'Sprocket Video - China',
+ 'description': 'Sprocket Video - China',
+ 'uploader': 'HP-Video Gallery',
+ 'timestamp': 1482263210,
+ 'upload_date': '20161220',
+ 'uploader_id': '1107601872001',
+ },
+ 'params': {
+ 'skip_download': True, # m3u8 download
+ },
+ },
+ # ooyala video
+ {
+ 'url': 'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
+ 'md5': '166dd577b433b4d4ebfee10b0824d8ff',
+ 'info_dict': {
+ 'id': 'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
+ 'ext': 'mp4',
+ 'title': '2cc213299525360.mov', # that's what we get
+ 'duration': 238.231,
+ },
+ 'add_ie': ['Ooyala'],
+ },
+ {
+ # ooyala video embedded with http://player.ooyala.com/iframe.js
+ 'url': 'http://www.macrumors.com/2015/07/24/steve-jobs-the-man-in-the-machine-first-trailer/',
+ 'info_dict': {
+ 'id': 'p0MGJndjoG5SOKqO_hZJuZFPB-Tr5VgB',
+ 'ext': 'mp4',
+ 'title': '"Steve Jobs: Man in the Machine" trailer',
+ 'description': 'The first trailer for the Alex Gibney documentary "Steve Jobs: Man in the Machine."',
+ 'duration': 135.427,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'movie expired',
+ },
+ # ooyala video embedded with http://player.ooyala.com/static/v4/production/latest/core.min.js
+ {
+ 'url': 'http://wnep.com/2017/07/22/steampunk-fest-comes-to-honesdale/',
+ 'info_dict': {
+ 'id': 'lwYWYxYzE6V5uJMjNGyKtwwiw9ZJD7t2',
+ 'ext': 'mp4',
+ 'title': 'Steampunk Fest Comes to Honesdale',
+ 'duration': 43.276,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ },
+ # embed.ly video
+ {
+ 'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
+ 'info_dict': {
+ 'id': '9ODmcdjQcHQ',
+ 'ext': 'mp4',
+ 'title': 'Tested: Grinding Coffee at 2000 Frames Per Second',
+ 'upload_date': '20140225',
+ 'description': 'md5:06a40fbf30b220468f1e0957c0f558ff',
+ 'uploader': 'Tested',
+ 'uploader_id': 'testedcom',
+ },
+ # No need to test YoutubeIE here
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # funnyordie embed
+ {
+ 'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
+ 'info_dict': {
+ 'id': '18e820ec3f',
+ 'ext': 'mp4',
+ 'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
+ 'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
+ },
+ # HEAD requests lead to endless 301, while GET is OK
+ 'expected_warnings': ['301'],
+ },
+ # RUTV embed
+ {
+ 'url': 'http://www.rg.ru/2014/03/15/reg-dfo/anklav-anons.html',
+ 'info_dict': {
+ 'id': '776940',
+ 'ext': 'mp4',
+ 'title': 'Охотское море стало целиком российским',
+ 'description': 'md5:5ed62483b14663e2a95ebbe115eb8f43',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ # TVC embed
+ {
+ 'url': 'http://sch1298sz.mskobr.ru/dou_edu/karamel_ki/filial_galleries/video/iframe_src_http_tvc_ru_video_iframe_id_55304_isplay_false_acc_video_id_channel_brand_id_11_show_episodes_episode_id_32307_frameb/',
+ 'info_dict': {
+ 'id': '55304',
+ 'ext': 'mp4',
+ 'title': 'Дошкольное воспитание',
+ },
+ },
+ # SportBox embed
+ {
+ 'url': 'http://www.vestifinance.ru/articles/25753',
+ 'info_dict': {
+ 'id': '25753',
+ 'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '370908',
+ 'title': 'Госзаказ. День 3',
+ 'ext': 'mp4',
+ }
+ }, {
+ 'info_dict': {
+ 'id': '370905',
+ 'title': 'Госзаказ. День 2',
+ 'ext': 'mp4',
+ }
+ }, {
+ 'info_dict': {
+ 'id': '370902',
+ 'title': 'Госзаказ. День 1',
+ 'ext': 'mp4',
+ }
+ }],
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ # Myvi.ru embed
+ {
+ 'url': 'http://www.kinomyvi.tv/news/detail/Pervij-dublirovannij-trejler--Uzhastikov-_nOw1',
+ 'info_dict': {
+ 'id': 'f4dafcad-ff21-423d-89b5-146cfd89fa1e',
+ 'ext': 'mp4',
+ 'title': 'Ужастики, русский трейлер (2015)',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 153,
+ }
+ },
+ # XHamster embed
+ {
+ 'url': 'http://www.numisc.com/forum/showthread.php?11696-FM15-which-pumiscer-was-this-%28-vid-%29-%28-alfa-as-fuck-srx-%29&s=711f5db534502e22260dec8c5e2d66d8',
+ 'info_dict': {
+ 'id': 'showthread',
+ 'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
+ },
+ 'playlist_mincount': 7,
+ # This forum does not allow <iframe> syntaxes anymore
+ # Now HTML tags are displayed as-is
+ 'skip': 'No videos on this page',
+ },
+ # Embedded TED video
+ {
+ 'url': 'http://en.support.wordpress.com/videos/ted-talks/',
+ 'md5': '65fdff94098e4a607385a60c5177c638',
+ 'info_dict': {
+ 'id': '1969',
+ 'ext': 'mp4',
+ 'title': 'Hidden miracles of the natural world',
+ 'uploader': 'Louie Schwartzberg',
+ 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
+ }
+ },
+ # nowvideo embed hidden behind percent encoding
+ {
+ 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
+ 'md5': '2baf4ddd70f697d94b1c18cf796d5107',
+ 'info_dict': {
+ 'id': '06e53103ca9aa',
+ 'ext': 'flv',
+ 'title': 'Macross Episode 001 Watch Macross Episode 001 onl',
+ 'description': 'No description',
+ },
+ },
+ # arte embed
+ {
+ 'url': 'http://www.tv-replay.fr/redirection/20-03-14/x-enius-arte-10753389.html',
+ 'md5': '7653032cbb25bf6c80d80f217055fa43',
+ 'info_dict': {
+ 'id': '048195-004_PLUS7-F',
+ 'ext': 'flv',
+ 'title': 'X:enius',
+ 'description': 'md5:d5fdf32ef6613cdbfd516ae658abf168',
+ 'upload_date': '20140320',
+ },
+ 'params': {
+ 'skip_download': 'Requires rtmpdump'
+ },
+ 'skip': 'video gone',
+ },
+ # francetv embed
+ {
+ 'url': 'http://www.tsprod.com/replay-du-concert-alcaline-de-calogero',
+ 'info_dict': {
+ 'id': 'EV_30231',
+ 'ext': 'mp4',
+ 'title': 'Alcaline, le concert avec Calogero',
+ 'description': 'md5:61f08036dcc8f47e9cfc33aed08ffaff',
+ 'upload_date': '20150226',
+ 'timestamp': 1424989860,
+ 'duration': 5400,
+ },
+ 'params': {
+ # m3u8 downloads
+ 'skip_download': True,
+ },
+ 'expected_warnings': [
+ 'Forbidden'
+ ]
+ },
+ # Condé Nast embed
+ {
+ 'url': 'http://www.wired.com/2014/04/honda-asimo/',
+ 'md5': 'ba0dfe966fa007657bd1443ee672db0f',
+ 'info_dict': {
+ 'id': '53501be369702d3275860000',
+ 'ext': 'mp4',
+ 'title': 'Honda’s New Asimo Robot Is More Human Than Ever',
+ }
+ },
+ # Dailymotion embed
+ {
+ 'url': 'http://www.spi0n.com/zap-spi0n-com-n216/',
+ 'md5': '441aeeb82eb72c422c7f14ec533999cd',
+ 'info_dict': {
+ 'id': 'k2mm4bCdJ6CQ2i7c8o2',
+ 'ext': 'mp4',
+ 'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
+ 'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
+ 'uploader': 'Spi0n',
+ 'uploader_id': 'xgditw',
+ 'upload_date': '20140425',
+ 'timestamp': 1398441542,
+ },
+ 'add_ie': ['Dailymotion'],
+ },
+ # DailyMail embed
+ {
+ 'url': 'http://www.bumm.sk/krimi/2017/07/05/biztonsagi-kamera-buktatta-le-az-agg-ferfit-utlegelo-apolot',
+ 'info_dict': {
+ 'id': '1495629',
+ 'ext': 'mp4',
+ 'title': 'Care worker punches elderly dementia patient in head 11 times',
+ 'description': 'md5:3a743dee84e57e48ec68bf67113199a5',
+ },
+ 'add_ie': ['DailyMail'],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # YouTube embed
+ {
+ 'url': 'http://www.badzine.de/ansicht/datum/2014/06/09/so-funktioniert-die-neue-englische-badminton-liga.html',
+ 'info_dict': {
+ 'id': 'FXRb4ykk4S0',
+ 'ext': 'mp4',
+ 'title': 'The NBL Auction 2014',
+ 'uploader': 'BADMINTON England',
+ 'uploader_id': 'BADMINTONEvents',
+ 'upload_date': '20140603',
+ 'description': 'md5:9ef128a69f1e262a700ed83edb163a73',
+ },
+ 'add_ie': ['Youtube'],
+ 'params': {
+ 'skip_download': True,
+ }
+ },
+ # MTVSercices embed
+ {
+ 'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
+ 'md5': 'ca1aef97695ef2c1d6973256a57e5252',
+ 'info_dict': {
+ 'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1',
+ 'ext': 'mp4',
+ 'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
+ 'description': 'Two valets share their love for movie star Liam Neesons.',
+ 'timestamp': 1349922600,
+ 'upload_date': '20121011',
+ },
+ },
+ # YouTube embed via <data-embed-url="">
+ {
+ 'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
+ 'info_dict': {
+ 'id': '4vAffPZIT44',
+ 'ext': 'mp4',
+ 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
+ 'uploader': 'Gameloft',
+ 'uploader_id': 'gameloft',
+ 'upload_date': '20140828',
+ 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ },
+ # YouTube <object> embed
+ {
+ 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/',
+ 'md5': '516718101ec834f74318df76259fb3cc',
+ 'info_dict': {
+ 'id': 'msN87y-iEx0',
+ 'ext': 'webm',
+ 'title': 'Feynman: Mirrors FUN TO IMAGINE 6',
+ 'upload_date': '20080526',
+ 'description': 'md5:0ffc78ea3f01b2e2c247d5f8d1d3c18d',
+ 'uploader': 'Christopher Sykes',
+ 'uploader_id': 'ChristopherJSykes',
+ },
+ 'add_ie': ['Youtube'],
+ },
+ # Camtasia studio
+ {
+ 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
+ 'playlist': [{
+ 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
+ 'info_dict': {
+ 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
+ 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
+ 'ext': 'flv',
+ 'duration': 2235.90,
+ }
+ }, {
+ 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
+ 'info_dict': {
+ 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
+ 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
+ 'ext': 'flv',
+ 'duration': 2235.93,
+ }
+ }],
+ 'info_dict': {
+ 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
+ }
+ },
+ # Flowplayer
+ {
+ 'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
+ 'md5': '9d65602bf31c6e20014319c7d07fba27',
+ 'info_dict': {
+ 'id': '5123ea6d5e5a7',
+ 'ext': 'mp4',
+ 'age_limit': 18,
+ 'uploader': 'www.handjobhub.com',
+ 'title': 'Busty Blonde Siri Tit Fuck While Wank at HandjobHub.com',
+ }
+ },
+ # Multiple brightcove videos
+ # https://github.com/ytdl-org/youtube-dl/issues/2283
+ {
+ 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
+ 'info_dict': {
+ 'id': 'always-never',
+ 'title': 'Always / Never - The New Yorker',
+ },
+ 'playlist_count': 3,
+ 'params': {
+ 'extract_flat': False,
+ 'skip_download': True,
+ }
+ },
+ # MLB embed
+ {
+ 'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
+ 'md5': '96f09a37e44da40dd083e12d9a683327',
+ 'info_dict': {
+ 'id': '33322633',
+ 'ext': 'mp4',
+ 'title': 'Ump changes call to ball',
+ 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
+ 'duration': 48,
+ 'timestamp': 1401537900,
+ 'upload_date': '20140531',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ },
+ # Wistia embed
+ {
+ 'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
+ 'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
+ 'info_dict': {
+ 'id': '6e2wtrbdaf',
+ 'ext': 'mov',
+ 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
+ 'description': 'a Paywall Videos video from Remilon',
+ 'duration': 644.072,
+ 'uploader': 'study.com',
+ 'timestamp': 1459678540,
+ 'upload_date': '20160403',
+ 'filesize': 24687186,
+ },
+ },
+ {
+ 'url': 'http://thoughtworks.wistia.com/medias/uxjb0lwrcz',
+ 'md5': 'baf49c2baa8a7de5f3fc145a8506dcd4',
+ 'info_dict': {
+ 'id': 'uxjb0lwrcz',
+ 'ext': 'mp4',
+ 'title': 'Conversation about Hexagonal Rails Part 1',
+ 'description': 'a Martin Fowler video from ThoughtWorks',
+ 'duration': 1715.0,
+ 'uploader': 'thoughtworks.wistia.com',
+ 'timestamp': 1401832161,
+ 'upload_date': '20140603',
+ },
+ },
+ # Wistia standard embed (async)
+ {
+ 'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
+ 'info_dict': {
+ 'id': '807fafadvk',
+ 'ext': 'mp4',
+ 'title': 'Drip Brennan Dunn Workshop',
+ 'description': 'a JV Webinars video from getdrip-1',
+ 'duration': 4986.95,
+ 'timestamp': 1463607249,
+ 'upload_date': '20160518',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ },
+ # Soundcloud embed
+ {
+ 'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
+ 'info_dict': {
+ 'id': '174391317',
+ 'ext': 'mp3',
+ 'description': 'md5:ff867d6b555488ad3c52572bb33d432c',
+ 'uploader': 'Sophos Security',
+ 'title': 'Chet Chat 171 - Oct 29, 2014',
+ 'upload_date': '20141029',
+ }
+ },
+ # Soundcloud multiple embeds
+ {
+ 'url': 'http://www.guitarplayer.com/lessons/1014/legato-workout-one-hour-to-more-fluid-performance---tab/52809',
+ 'info_dict': {
+ 'id': '52809',
+ 'title': 'Guitar Essentials: Legato Workout—One-Hour to Fluid Performance | TAB + AUDIO',
+ },
+ 'playlist_mincount': 7,
+ },
+ # TuneIn station embed
+ {
+ 'url': 'http://radiocnrv.com/promouvoir-radio-cnrv/',
+ 'info_dict': {
+ 'id': '204146',
+ 'ext': 'mp3',
+ 'title': 'CNRV',
+ 'location': 'Paris, France',
+ 'is_live': True,
+ },
+ 'params': {
+ # Live stream
+ 'skip_download': True,
+ },
+ },
+ # Livestream embed
+ {
+ 'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
+ 'info_dict': {
+ 'id': '67864563',
+ 'ext': 'flv',
+ 'upload_date': '20141112',
+ 'title': 'Rosetta #CometLanding webcast HL 10',
+ }
+ },
+ # Another Livestream embed, without 'new.' in URL
+ {
+ 'url': 'https://www.freespeech.org/',
+ 'info_dict': {
+ 'id': '123537347',
+ 'ext': 'mp4',
+ 'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ },
+ 'params': {
+ # Live stream
+ 'skip_download': True,
+ },
+ },
+ # LazyYT
+ {
+ 'url': 'https://skiplagged.com/',
+ 'info_dict': {
+ 'id': 'skiplagged',
+ 'title': 'Skiplagged: The smart way to find cheap flights',
+ },
+ 'playlist_mincount': 1,
+ 'add_ie': ['Youtube'],
+ },
+ # Cinchcast embed
+ {
+ 'url': 'http://undergroundwellness.com/podcasts/306-5-steps-to-permanent-gut-healing/',
+ 'info_dict': {
+ 'id': '7141703',
+ 'ext': 'mp3',
+ 'upload_date': '20141126',
+ 'title': 'Jack Tips: 5 Steps to Permanent Gut Healing',
+ }
+ },
+ # Cinerama player
+ {
+ 'url': 'http://www.abc.net.au/7.30/content/2015/s4164797.htm',
+ 'info_dict': {
+ 'id': '730m_DandD_1901_512k',
+ 'ext': 'mp4',
+ 'uploader': 'www.abc.net.au',
+ 'title': 'Game of Thrones with dice - Dungeons and Dragons fantasy role-playing game gets new life - 19/01/2015',
+ }
+ },
+ # embedded viddler video
+ {
+ 'url': 'http://deadspin.com/i-cant-stop-watching-john-wall-chop-the-nuggets-with-th-1681801597',
+ 'info_dict': {
+ 'id': '4d03aad9',
+ 'ext': 'mp4',
+ 'uploader': 'deadspin',
+ 'title': 'WALL-TO-GORTAT',
+ 'timestamp': 1422285291,
+ 'upload_date': '20150126',
+ },
+ 'add_ie': ['Viddler'],
+ },
+ # Libsyn embed
+ {
+ 'url': 'http://thedailyshow.cc.com/podcast/episodetwelve',
+ 'info_dict': {
+ 'id': '3377616',
+ 'ext': 'mp3',
+ 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
+ 'description': 'md5:601cb790edd05908957dae8aaa866465',
+ 'upload_date': '20150220',
+ },
+ 'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
+ },
+ # jwplayer YouTube
+ {
+ 'url': 'http://media.nationalarchives.gov.uk/index.php/webinar-using-discovery-national-archives-online-catalogue/',
+ 'info_dict': {
+ 'id': 'Mrj4DVp2zeA',
+ 'ext': 'mp4',
+ 'upload_date': '20150212',
+ 'uploader': 'The National Archives UK',
+ 'description': 'md5:8078af856dca76edc42910b61273dbbf',
+ 'uploader_id': 'NationalArchives08',
+ 'title': 'Webinar: Using Discovery, The National Archives’ online catalogue',
+ },
+ },
+ # jwplayer rtmp
+ {
+ 'url': 'http://www.suffolk.edu/sjc/live.php',
+ 'info_dict': {
+ 'id': 'live',
+ 'ext': 'flv',
+ 'title': 'Massachusetts Supreme Judicial Court Oral Arguments',
+ 'uploader': 'www.suffolk.edu',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Only has video a few mornings per month, see http://www.suffolk.edu/sjc/',
+ },
+ # Complex jwplayer
+ {
+ 'url': 'http://www.indiedb.com/games/king-machine/videos',
+ 'info_dict': {
+ 'id': 'videos',
+ 'ext': 'mp4',
+ 'title': 'king machine trailer 1',
+ 'description': 'Browse King Machine videos & audio for sweet media. Your eyes will thank you.',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ },
+ {
+ # JWPlayer config passed as variable
+ 'url': 'http://www.txxx.com/videos/3326530/ariele/',
+ 'info_dict': {
+ 'id': '3326530_hq',
+ 'ext': 'mp4',
+ 'title': 'ARIELE | Tube Cup',
+ 'uploader': 'www.txxx.com',
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ },
+ {
+ # JWPlatform iframe
+ 'url': 'https://www.mediaite.com/tv/dem-senator-claims-gary-cohn-faked-a-bad-connection-during-trump-call-to-get-him-off-the-phone/',
+ 'md5': 'ca00a040364b5b439230e7ebfd02c4e9',
+ 'info_dict': {
+ 'id': 'O0c5JcKT',
+ 'ext': 'mp4',
+ 'upload_date': '20171122',
+ 'timestamp': 1511366290,
+ 'title': 'Dem Senator Claims Gary Cohn Faked a Bad Connection During Trump Call to Get Him Off the Phone',
+ },
+ 'add_ie': [JWPlatformIE.ie_key()],
+ },
+ {
+ # Video.js embed, multiple formats
+ 'url': 'http://ortcam.com/solidworks-урок-6-настройка-чертежа_33f9b7351.html',
+ 'info_dict': {
+ 'id': 'yygqldloqIk',
+ 'ext': 'mp4',
+ 'title': 'SolidWorks. Урок 6 Настройка чертежа',
+ 'description': 'md5:baf95267792646afdbf030e4d06b2ab3',
+ 'upload_date': '20130314',
+ 'uploader': 'PROстое3D',
+ 'uploader_id': 'PROstoe3D',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Video.js embed, single format
+ 'url': 'https://www.vooplayer.com/v3/watch/watch.php?v=NzgwNTg=',
+ 'info_dict': {
+ 'id': 'watch',
+ 'ext': 'mp4',
+ 'title': 'Step 1 - Good Foundation',
+ 'description': 'md5:d1e7ff33a29fc3eb1673d6c270d344f4',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # rtl.nl embed
+ {
+ 'url': 'http://www.rtlnieuws.nl/nieuws/buitenland/aanslagen-kopenhagen',
+ 'playlist_mincount': 5,
+ 'info_dict': {
+ 'id': 'aanslagen-kopenhagen',
+ 'title': 'Aanslagen Kopenhagen',
+ }
+ },
+ # Zapiks embed
+ {
+ 'url': 'http://www.skipass.com/news/116090-bon-appetit-s5ep3-baqueira-mi-cor.html',
+ 'info_dict': {
+ 'id': '118046',
+ 'ext': 'mp4',
+ 'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
+ }
+ },
+ # Kaltura embed (different embed code)
+ {
+ 'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
+ 'info_dict': {
+ 'id': '1_a52wc67y',
+ 'ext': 'flv',
+ 'upload_date': '20150127',
+ 'uploader_id': 'PremierMedia',
+ 'timestamp': int,
+ 'title': 'Os Guinness // Is It Fools Talk? // Unbelievable? Conference 2014',
+ },
+ },
+ # Kaltura embed with single quotes
+ {
+ 'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
+ 'info_dict': {
+ 'id': '0_izeg5utt',
+ 'ext': 'mp4',
+ 'title': '35871',
+ 'timestamp': 1355743100,
+ 'upload_date': '20121217',
+ 'uploader_id': 'cplapp@learn360.com',
+ },
+ 'add_ie': ['Kaltura'],
+ },
+ {
+ # Kaltura embedded via quoted entry_id
+ 'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
+ 'info_dict': {
+ 'id': '0_utuok90b',
+ 'ext': 'mp4',
+ 'title': '06_matthew_brender_raj_dutt',
+ 'timestamp': 1466638791,
+ 'upload_date': '20160622',
+ },
+ 'add_ie': ['Kaltura'],
+ 'expected_warnings': [
+ 'Could not send HEAD request'
+ ],
+ 'params': {
+ 'skip_download': True,
+ }
+ },
+ {
+ # Kaltura embedded, some fileExt broken (#11480)
+ 'url': 'http://www.cornell.edu/video/nima-arkani-hamed-standard-models-of-particle-physics',
+ 'info_dict': {
+ 'id': '1_sgtvehim',
+ 'ext': 'mp4',
+ 'title': 'Our "Standard Models" of particle physics and cosmology',
+ 'description': 'md5:67ea74807b8c4fea92a6f38d6d323861',
+ 'timestamp': 1321158993,
+ 'upload_date': '20111113',
+ 'uploader_id': 'kps1',
+ },
+ 'add_ie': ['Kaltura'],
+ },
+ {
+ # Kaltura iframe embed
+ 'url': 'http://www.gsd.harvard.edu/event/i-m-pei-a-centennial-celebration/',
+ 'md5': 'ae5ace8eb09dc1a35d03b579a9c2cc44',
+ 'info_dict': {
+ 'id': '0_f2cfbpwy',
+ 'ext': 'mp4',
+ 'title': 'I. M. Pei: A Centennial Celebration',
+ 'description': 'md5:1db8f40c69edc46ca180ba30c567f37c',
+ 'upload_date': '20170403',
+ 'uploader_id': 'batchUser',
+ 'timestamp': 1491232186,
+ },
+ 'add_ie': ['Kaltura'],
+ },
+ {
+ # Kaltura iframe embed, more sophisticated
+ 'url': 'http://www.cns.nyu.edu/~eero/math-tools/Videos/lecture-05sep2017.html',
+ 'info_dict': {
+ 'id': '1_9gzouybz',
+ 'ext': 'mp4',
+ 'title': 'lecture-05sep2017',
+ 'description': 'md5:40f347d91fd4ba047e511c5321064b49',
+ 'upload_date': '20170913',
+ 'uploader_id': 'eps2',
+ 'timestamp': 1505340777,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Kaltura'],
+ },
+ {
+ # meta twitter:player
+ 'url': 'http://thechive.com/2017/12/08/all-i-want-for-christmas-is-more-twerk/',
+ 'info_dict': {
+ 'id': '0_01b42zps',
+ 'ext': 'mp4',
+ 'title': 'Main Twerk (Video)',
+ 'upload_date': '20171208',
+ 'uploader_id': 'sebastian.salinas@thechive.com',
+ 'timestamp': 1512713057,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Kaltura'],
+ },
+ # referrer protected EaglePlatform embed
+ {
+ 'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/',
+ 'info_dict': {
+ 'id': '582306',
+ 'ext': 'mp4',
+ 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 3382,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # ClipYou (EaglePlatform) embed (custom URL)
+ {
+ 'url': 'http://muz-tv.ru/play/7129/',
+ # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
+ 'info_dict': {
+ 'id': '12820',
+ 'ext': 'mp4',
+ 'title': "'O Sole Mio",
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 216,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'This video is unavailable.',
+ },
+ # Pladform embed
+ {
+ 'url': 'http://muz-tv.ru/kinozal/view/7400/',
+ 'info_dict': {
+ 'id': '100183293',
+ 'ext': 'mp4',
+ 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
+ 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 694,
+ 'age_limit': 0,
+ },
+ 'skip': 'HTTP Error 404: Not Found',
+ },
+ # Playwire embed
+ {
+ 'url': 'http://www.cinemablend.com/new/First-Joe-Dirt-2-Trailer-Teaser-Stupid-Greatness-70874.html',
+ 'info_dict': {
+ 'id': '3519514',
+ 'ext': 'mp4',
+ 'title': 'Joe Dirt 2 Beautiful Loser Teaser Trailer',
+ 'thumbnail': r're:^https?://.*\.png$',
+ 'duration': 45.115,
+ },
+ },
+ # 5min embed
+ {
+ 'url': 'http://techcrunch.com/video/facebook-creates-on-this-day-crunch-report/518726732/',
+ 'md5': '4c6f127a30736b59b3e2c19234ee2bf7',
+ 'info_dict': {
+ 'id': '518726732',
+ 'ext': 'mp4',
+ 'title': 'Facebook Creates "On This Day" | Crunch Report',
+ 'description': 'Amazon updates Fire TV line, Tesla\'s Model X spotted in the wild',
+ 'timestamp': 1427237531,
+ 'uploader': 'Crunch Report',
+ 'upload_date': '20150324',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ # Crooks and Liars embed
+ {
+ 'url': 'http://crooksandliars.com/2015/04/fox-friends-says-protecting-atheists',
+ 'info_dict': {
+ 'id': '8RUoRhRi',
+ 'ext': 'mp4',
+ 'title': "Fox & Friends Says Protecting Atheists From Discrimination Is Anti-Christian!",
+ 'description': 'md5:e1a46ad1650e3a5ec7196d432799127f',
+ 'timestamp': 1428207000,
+ 'upload_date': '20150405',
+ 'uploader': 'Heather',
+ },
+ },
+ # Crooks and Liars external embed
+ {
+ 'url': 'http://theothermccain.com/2010/02/02/video-proves-that-bill-kristol-has-been-watching-glenn-beck/comment-page-1/',
+ 'info_dict': {
+ 'id': 'MTE3MjUtMzQ2MzA',
+ 'ext': 'mp4',
+ 'title': 'md5:5e3662a81a4014d24c250d76d41a08d5',
+ 'description': 'md5:9b8e9542d6c3c5de42d6451b7d780cec',
+ 'timestamp': 1265032391,
+ 'upload_date': '20100201',
+ 'uploader': 'Heather',
+ },
+ },
+ # NBC Sports vplayer embed
+ {
+ 'url': 'http://www.riderfans.com/forum/showthread.php?121827-Freeman&s=e98fa1ea6dc08e886b1678d35212494a',
+ 'info_dict': {
+ 'id': 'ln7x1qSThw4k',
+ 'ext': 'flv',
+ 'title': "PFT Live: New leader in the 'new-look' defense",
+ 'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
+ 'uploader': 'NBCU-SPORTS',
+ 'upload_date': '20140107',
+ 'timestamp': 1389118457,
+ },
+ 'skip': 'Invalid Page URL',
+ },
+ # NBC News embed
+ {
+ 'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
+ 'md5': '1aa589c675898ae6d37a17913cf68d66',
+ 'info_dict': {
+ 'id': 'x_dtl_oa_LettermanliftPR_160608',
+ 'ext': 'mp4',
+ 'title': 'David Letterman: A Preview',
+ 'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
+ 'upload_date': '20160609',
+ 'timestamp': 1465431544,
+ 'uploader': 'NBCU-NEWS',
+ },
+ },
+ # UDN embed
+ {
+ 'url': 'https://video.udn.com/news/300346',
+ 'md5': 'fd2060e988c326991037b9aff9df21a6',
+ 'info_dict': {
+ 'id': '300346',
+ 'ext': 'mp4',
+ 'title': '中一中男師變性 全校師生力挺',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Failed to parse JSON Expecting value'],
+ },
+ # Brightcove URL in single quotes
+ {
+ 'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
+ 'md5': '4ae374f1f8b91c889c4b9203c8c752af',
+ 'info_dict': {
+ 'id': '4255764656001',
+ 'ext': 'mp4',
+ 'title': 'SN Presents: Russell Martin, World Citizen',
+ 'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
+ 'uploader': 'Rogers Sportsnet',
+ 'uploader_id': '1704050871',
+ 'upload_date': '20150525',
+ 'timestamp': 1432570283,
+ },
+ },
+ # Kinja embed
+ {
+ 'url': 'http://www.clickhole.com/video/dont-understand-bitcoin-man-will-mumble-explanatio-2537',
+ 'info_dict': {
+ 'id': '106351',
+ 'ext': 'mp4',
+ 'title': 'Don’t Understand Bitcoin? This Man Will Mumble An Explanation At You',
+ 'description': 'Migrated from OnionStudios',
+ 'thumbnail': r're:^https?://.*\.jpe?g$',
+ 'uploader': 'clickhole',
+ 'upload_date': '20150527',
+ 'timestamp': 1432744860,
+ }
+ },
+ # SnagFilms embed
+ {
+ 'url': 'http://whilewewatch.blogspot.ru/2012/06/whilewewatch-whilewewatch-gripping.html',
+ 'info_dict': {
+ 'id': '74849a00-85a9-11e1-9660-123139220831',
+ 'ext': 'mp4',
+ 'title': '#whilewewatch',
+ }
+ },
+ # AdobeTVVideo embed
+ {
+ 'url': 'https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners',
+ 'md5': '43662b577c018ad707a63766462b1e87',
+ 'info_dict': {
+ 'id': '2456',
+ 'ext': 'mp4',
+ 'title': 'New experience with Acrobat DC',
+ 'description': 'New experience with Acrobat DC',
+ 'duration': 248.667,
+ },
+ },
+ # BrightcoveInPageEmbed embed
+ {
+ 'url': 'http://www.geekandsundry.com/tabletop-bonus-wils-final-thoughts-on-dread/',
+ 'info_dict': {
+ 'id': '4238694884001',
+ 'ext': 'flv',
+ 'title': 'Tabletop: Dread, Last Thoughts',
+ 'description': 'Tabletop: Dread, Last Thoughts',
+ 'duration': 51690,
+ },
+ },
+ # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
+ # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
+ {
+ 'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
+ 'info_dict': {
+ 'id': '4785848093001',
+ 'ext': 'mp4',
+ 'title': 'The Cardinal Pell Interview',
+ 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
+ 'uploader': 'GlobeCast Australia - GlobeStream',
+ 'uploader_id': '2733773828001',
+ 'upload_date': '20160304',
+ 'timestamp': 1457083087,
+ },
+ 'params': {
+ # m3u8 downloads
+ 'skip_download': True,
+ },
+ },
+ {
+ # Brightcove embed with whitespace around attribute names
+ 'url': 'http://www.stack.com/video/3167554373001/learn-to-hit-open-three-pointers-with-damian-lillard-s-baseline-drift-drill',
+ 'info_dict': {
+ 'id': '3167554373001',
+ 'ext': 'mp4',
+ 'title': "Learn to Hit Open Three-Pointers With Damian Lillard's Baseline Drift Drill",
+ 'description': 'md5:57bacb0e0f29349de4972bfda3191713',
+ 'uploader_id': '1079349493',
+ 'upload_date': '20140207',
+ 'timestamp': 1391810548,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # Another form of arte.tv embed
+ {
+ 'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
+ 'md5': '850bfe45417ddf221288c88a0cffe2e2',
+ 'info_dict': {
+ 'id': '030273-562_PLUS7-F',
+ 'ext': 'mp4',
+ 'title': 'ARTE Reportage - Nulle part, en France',
+ 'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
+ 'upload_date': '20160409',
+ },
+ },
+ # LiveLeak embed
+ {
+ 'url': 'http://www.wykop.pl/link/3088787/',
+ 'md5': '7619da8c820e835bef21a1efa2a0fc71',
+ 'info_dict': {
+ 'id': '874_1459135191',
+ 'ext': 'mp4',
+ 'title': 'Man shows poor quality of new apartment building',
+ 'description': 'The wall is like a sand pile.',
+ 'uploader': 'Lake8737',
+ },
+ 'add_ie': [LiveLeakIE.ie_key()],
+ },
+ # Another LiveLeak embed pattern (#13336)
+ {
+ 'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/',
+ 'info_dict': {
+ 'id': '2eb_1496309988',
+ 'ext': 'mp4',
+ 'title': 'Thief robs place where everyone was armed',
+ 'description': 'md5:694d73ee79e535953cf2488562288eee',
+ 'uploader': 'brazilwtf',
+ },
+ 'add_ie': [LiveLeakIE.ie_key()],
+ },
+ # Duplicated embedded video URLs
+ {
+ 'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
+ 'info_dict': {
+ 'id': '149298443_480_16c25b74_2',
+ 'ext': 'mp4',
+ 'title': 'vs. Blue Orange Spring Game',
+ 'uploader': 'www.hudl.com',
+ },
+ },
+ # twitter:player:stream embed
+ {
+ 'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288',
+ 'info_dict': {
+ 'id': 'master',
+ 'ext': 'mp4',
+ 'title': 'Une nouvelle espèce de dinosaure découverte en Argentine',
+ 'uploader': 'www.rtl.be',
+ },
+ 'params': {
+ # m3u8 downloads
+ 'skip_download': True,
+ },
+ },
+ # twitter:player embed
+ {
+ 'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
+ 'md5': 'a3e0df96369831de324f0778e126653c',
+ 'info_dict': {
+ 'id': '4909620399001',
+ 'ext': 'mp4',
+ 'title': 'What Do Black Holes Sound Like?',
+ 'description': 'what do black holes sound like',
+ 'upload_date': '20160524',
+ 'uploader_id': '29913724001',
+ 'timestamp': 1464107587,
+ 'uploader': 'TheAtlantic',
+ },
+ 'add_ie': ['BrightcoveLegacy'],
+ },
+ # Facebook <iframe> embed
+ {
+ 'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
+ 'md5': 'fbcde74f534176ecb015849146dd3aee',
+ 'info_dict': {
+ 'id': '599637780109885',
+ 'ext': 'mp4',
+ 'title': 'Facebook video #599637780109885',
+ },
+ },
+ # Facebook <iframe> embed, plugin video
+ {
+ 'url': 'http://5pillarsuk.com/2017/06/07/tariq-ramadan-disagrees-with-pr-exercise-by-imams-refusing-funeral-prayers-for-london-attackers/',
+ 'info_dict': {
+ 'id': '1754168231264132',
+ 'ext': 'mp4',
+ 'title': 'About the Imams and Religious leaders refusing to perform funeral prayers for...',
+ 'uploader': 'Tariq Ramadan (official)',
+ 'timestamp': 1496758379,
+ 'upload_date': '20170606',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # Facebook API embed
+ {
+ 'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
+ 'md5': 'a47372ee61b39a7b90287094d447d94e',
+ 'info_dict': {
+ 'id': '10153467542406923',
+ 'ext': 'mp4',
+ 'title': 'Facebook video #10153467542406923',
+ },
+ },
+ # Wordpress "YouTube Video Importer" plugin
+ {
+ 'url': 'http://www.lothype.com/blue-devils-drumline-stanford-lot-2016/',
+ 'md5': 'd16797741b560b485194eddda8121b48',
+ 'info_dict': {
+ 'id': 'HNTXWDXV9Is',
+ 'ext': 'mp4',
+ 'title': 'Blue Devils Drumline Stanford lot 2016',
+ 'upload_date': '20160627',
+ 'uploader_id': 'GENOCIDE8GENERAL10',
+ 'uploader': 'cylus cyrus',
+ },
+ },
+ {
+ # video stored on custom kaltura server
+ 'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
+ 'md5': '537617d06e64dfed891fa1593c4b30cc',
+ 'info_dict': {
+ 'id': '0_1iotm5bh',
+ 'ext': 'mp4',
+ 'title': 'Elecciones británicas: 5 lecciones para Rajoy',
+ 'description': 'md5:435a89d68b9760b92ce67ed227055f16',
+ 'uploader_id': 'videos.expansion@el-mundo.net',
+ 'upload_date': '20150429',
+ 'timestamp': 1430303472,
+ },
+ 'add_ie': ['Kaltura'],
+ },
+ {
+ # multiple kaltura embeds, nsfw
+ 'url': 'https://www.quartier-rouge.be/prive/femmes/kamila-avec-video-jaime-sadomie.html',
+ 'info_dict': {
+ 'id': 'kamila-avec-video-jaime-sadomie',
+ 'title': "Kamila avec vídeo “J'aime sadomie”",
+ },
+ 'playlist_count': 8,
+ },
+ {
+ # Non-standard Vimeo embed
+ 'url': 'https://openclassrooms.com/courses/understanding-the-web',
+ 'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
+ 'info_dict': {
+ 'id': '148867247',
+ 'ext': 'mp4',
+ 'title': 'Understanding the web - Teaser',
+ 'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
+ 'upload_date': '20151214',
+ 'uploader': 'OpenClassrooms',
+ 'uploader_id': 'openclassrooms',
+ },
+ 'add_ie': ['Vimeo'],
+ },
+ {
+ # generic vimeo embed that requires original URL passed as Referer
+ 'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
+ 'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
+ 'info_dict': {
+ 'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
+ 'ext': 'mp4',
+ 'title': 'Big Buck Bunny',
+ 'description': 'Royalty free test video',
+ 'timestamp': 1432816365,
+ 'upload_date': '20150528',
+ 'is_live': False,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [ArkenaIE.ie_key()],
+ },
+ {
+ 'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/',
+ 'info_dict': {
+ 'id': '1c7141f46c',
+ 'ext': 'mp4',
+ 'title': 'НА КОСЪМ ОТ ВЗРИВ: Изтичане на газ на бензиностанция в Пловдив',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [Vbox7IE.ie_key()],
+ },
+ {
+ # DBTV embeds
+ 'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/',
+ 'info_dict': {
+ 'id': '43254897',
+ 'title': 'Etter ett års planlegging, klaffet endelig alt: - Jeg måtte ta en liten dans',
+ },
+ 'playlist_mincount': 3,
+ },
+ {
+ # Videa embeds
+ 'url': 'http://forum.dvdtalk.com/movie-talk/623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style.html',
+ 'info_dict': {
+ 'id': '623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style',
+ 'title': 'Deleted Magic - Star Wars: OT Deleted / Alt. Scenes Docu. Style - DVD Talk Forum',
+ },
+ 'playlist_mincount': 2,
+ },
+ {
+ # 20 minuten embed
+ 'url': 'http://www.20min.ch/schweiz/news/story/So-kommen-Sie-bei-Eis-und-Schnee-sicher-an-27032552',
+ 'info_dict': {
+ 'id': '523629',
+ 'ext': 'mp4',
+ 'title': 'So kommen Sie bei Eis und Schnee sicher an',
+ 'description': 'md5:117c212f64b25e3d95747e5276863f7d',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [TwentyMinutenIE.ie_key()],
+ },
+ {
+ # VideoPress embed
+ 'url': 'https://en.support.wordpress.com/videopress/',
+ 'info_dict': {
+ 'id': 'OcobLTqC',
+ 'ext': 'm4v',
+ 'title': 'IMG_5786',
+ 'timestamp': 1435711927,
+ 'upload_date': '20150701',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [VideoPressIE.ie_key()],
+ },
+ {
+ # Rutube embed
+ 'url': 'http://magazzino.friday.ru/videos/vipuski/kazan-2',
+ 'info_dict': {
+ 'id': '9b3d5bee0a8740bf70dfd29d3ea43541',
+ 'ext': 'flv',
+ 'title': 'Магаззино: Казань 2',
+ 'description': 'md5:99bccdfac2269f0e8fdbc4bbc9db184a',
+ 'uploader': 'Магаззино',
+ 'upload_date': '20170228',
+ 'uploader_id': '996642',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [RutubeIE.ie_key()],
+ },
+ {
+ # ThePlatform embedded with whitespaces in URLs
+ 'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
+ 'only_matching': True,
+ },
+ {
+ # Senate ISVP iframe https
+ 'url': 'https://www.hsgac.senate.gov/hearings/canadas-fast-track-refugee-plan-unanswered-questions-and-implications-for-us-national-security',
+ 'md5': 'fb8c70b0b515e5037981a2492099aab8',
+ 'info_dict': {
+ 'id': 'govtaff020316',
+ 'ext': 'mp4',
+ 'title': 'Integrated Senate Video Player',
+ },
+ 'add_ie': [SenateISVPIE.ie_key()],
+ },
+ {
+ # Limelight embeds (1 channel embed + 4 media embeds)
+ 'url': 'http://www.sedona.com/FacilitatorTraining2017',
+ 'info_dict': {
+ 'id': 'FacilitatorTraining2017',
+ 'title': 'Facilitator Training 2017',
+ },
+ 'playlist_mincount': 5,
+ },
+ {
+ # Limelight embed (LimelightPlayerUtil.embed)
+ 'url': 'https://tv5.ca/videos?v=xuu8qowr291ri',
+ 'info_dict': {
+ 'id': '95d035dc5c8a401588e9c0e6bd1e9c92',
+ 'ext': 'mp4',
+ 'title': '07448641',
+ 'timestamp': 1499890639,
+ 'upload_date': '20170712',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['LimelightMedia'],
+ },
+ {
+ 'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/',
+ 'info_dict': {
+ 'id': 'standoff-with-walnut-creek-murder-suspect-ends-with-arrest',
+ 'title': 'Standoff with Walnut Creek murder suspect ends',
+ 'description': 'md5:3ccc48a60fc9441eeccfc9c469ebf788',
+ },
+ 'playlist_mincount': 4,
+ },
+ {
+ # WashingtonPost embed
+ 'url': 'http://www.vanityfair.com/hollywood/2017/04/donald-trump-tv-pitches',
+ 'info_dict': {
+ 'id': '8caf6e88-d0ec-11e5-90d3-34c2c42653ac',
+ 'ext': 'mp4',
+ 'title': "No one has seen the drama series based on Trump's life \u2014 until now",
+ 'description': 'Donald Trump wanted a weekly TV drama based on his life. It never aired. But The Washington Post recently obtained a scene from the pilot script — and enlisted actors.',
+ 'timestamp': 1455216756,
+ 'uploader': 'The Washington Post',
+ 'upload_date': '20160211',
+ },
+ 'add_ie': [WashingtonPostIE.ie_key()],
+ },
+ {
+ # Mediaset embed
+ 'url': 'http://www.tgcom24.mediaset.it/politica/serracchiani-voglio-vivere-in-una-societa-aperta-reazioni-sproporzionate-_3071354-201702a.shtml',
+ 'info_dict': {
+ 'id': '720642',
+ 'ext': 'mp4',
+ 'title': 'Serracchiani: "Voglio vivere in una società aperta, con tutela del patto di fiducia"',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [MediasetIE.ie_key()],
+ },
+ {
+ # JOJ.sk embeds
+ 'url': 'https://www.noviny.sk/slovensko/238543-slovenskom-sa-prehnala-vlna-silnych-burok',
+ 'info_dict': {
+ 'id': '238543-slovenskom-sa-prehnala-vlna-silnych-burok',
+ 'title': 'Slovenskom sa prehnala vlna silných búrok',
+ },
+ 'playlist_mincount': 5,
+ 'add_ie': [JojIE.ie_key()],
+ },
+ {
+ # AMP embed (see https://www.ampproject.org/docs/reference/components/amp-video)
+ 'url': 'https://tvrain.ru/amp/418921/',
+ 'md5': 'cc00413936695987e8de148b67d14f1d',
+ 'info_dict': {
+ 'id': '418921',
+ 'ext': 'mp4',
+ 'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
+ },
+ },
+ {
+ # vzaar embed
+ 'url': 'http://help.vzaar.com/article/165-embedding-video',
+ 'md5': '7e3919d9d2620b89e3e00bec7fe8c9d4',
+ 'info_dict': {
+ 'id': '8707641',
+ 'ext': 'mp4',
+ 'title': 'Building A Business Online: Principal Chairs Q & A',
+ },
+ },
+ {
+ # multiple HTML5 videos on one page
+ 'url': 'https://www.paragon-software.com/home/rk-free/keyscenarios.html',
+ 'info_dict': {
+ 'id': 'keyscenarios',
+ 'title': 'Rescue Kit 14 Free Edition - Getting started',
+ },
+ 'playlist_count': 4,
+ },
+ {
+ # vshare embed
+ 'url': 'https://youtube-dlc-demo.neocities.org/vshare.html',
+ 'md5': '17b39f55b5497ae8b59f5fbce8e35886',
+ 'info_dict': {
+ 'id': '0f64ce6',
+ 'title': 'vl14062007715967',
+ 'ext': 'mp4',
+ }
+ },
+ {
+ 'url': 'http://www.heidelberg-laureate-forum.org/blog/video/lecture-friday-september-23-2016-sir-c-antony-r-hoare/',
+ 'md5': 'aecd089f55b1cb5a59032cb049d3a356',
+ 'info_dict': {
+ 'id': '90227f51a80c4d8f86c345a7fa62bd9a1d',
+ 'ext': 'mp4',
+ 'title': 'Lecture: Friday, September 23, 2016 - Sir Tony Hoare',
+ 'description': 'md5:5a51db84a62def7b7054df2ade403c6c',
+ 'timestamp': 1474354800,
+ 'upload_date': '20160920',
+ }
+ },
+ {
+ 'url': 'http://www.kidzworld.com/article/30935-trolls-the-beat-goes-on-interview-skylar-astin-and-amanda-leighton',
+ 'info_dict': {
+ 'id': '1731611',
+ 'ext': 'mp4',
+ 'title': 'Official Trailer | TROLLS: THE BEAT GOES ON!',
+ 'description': 'md5:eb5f23826a027ba95277d105f248b825',
+ 'timestamp': 1516100691,
+ 'upload_date': '20180116',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [SpringboardPlatformIE.ie_key()],
+ },
+ {
+ 'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
+ 'info_dict': {
+ 'id': 'uPDB5I9wfp8',
+ 'ext': 'webm',
+ 'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
+ 'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
+ 'upload_date': '20160219',
+ 'uploader': 'Pocoyo - Português (BR)',
+ 'uploader_id': 'PocoyoBrazil',
+ },
+ 'add_ie': [YoutubeIE.ie_key()],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://www.yapfiles.ru/show/1872528/690b05d3054d2dbe1e69523aa21bb3b1.mp4.html',
+ 'info_dict': {
+ 'id': 'vMDE4NzI1Mjgt690b',
+ 'ext': 'mp4',
+ 'title': 'Котята',
+ },
+ 'add_ie': [YapFilesIE.ie_key()],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # CloudflareStream embed
+ 'url': 'https://www.cloudflare.com/products/cloudflare-stream/',
+ 'info_dict': {
+ 'id': '31c9291ab41fac05471db4e73aa11717',
+ 'ext': 'mp4',
+ 'title': '31c9291ab41fac05471db4e73aa11717',
+ },
+ 'add_ie': [CloudflareStreamIE.ie_key()],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # PeerTube embed
+ 'url': 'https://joinpeertube.org/fr/home/',
+ 'info_dict': {
+ 'id': 'home',
+ 'title': 'Reprenez le contrôle de vos vidéos ! #JoinPeertube',
+ },
+ 'playlist_count': 2,
+ },
+ {
+ # Indavideo embed
+ 'url': 'https://streetkitchen.hu/receptek/igy_kell_otthon_hamburgert_sutni/',
+ 'info_dict': {
+ 'id': '1693903',
+ 'ext': 'mp4',
+ 'title': 'Így kell otthon hamburgert sütni',
+ 'description': 'md5:f5a730ecf900a5c852e1e00540bbb0f7',
+ 'timestamp': 1426330212,
+ 'upload_date': '20150314',
+ 'uploader': 'StreetKitchen',
+ 'uploader_id': '546363',
+ },
+ 'add_ie': [IndavideoEmbedIE.ie_key()],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # APA embed via JWPlatform embed
+ 'url': 'http://www.vol.at/blue-man-group/5593454',
+ 'info_dict': {
+ 'id': 'jjv85FdZ',
+ 'ext': 'mp4',
+ 'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 254,
+ 'timestamp': 1519211149,
+ 'upload_date': '20180221',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://share-videos.se/auto/video/83645793?uid=13',
+ 'md5': 'b68d276de422ab07ee1d49388103f457',
+ 'info_dict': {
+ 'id': '83645793',
+ 'title': 'Lock up and get excited',
+ 'ext': 'mp4'
+ },
+ 'skip': 'TODO: fix nested playlists processing in tests',
+ },
+ {
+ # Viqeo embeds
+ 'url': 'https://viqeo.tv/',
+ 'info_dict': {
+ 'id': 'viqeo',
+ 'title': 'All-new video platform',
+ },
+ 'playlist_count': 6,
+ },
+ {
+ # Squarespace video embed, 2019-08-28
+ 'url': 'http://ootboxford.com',
+ 'info_dict': {
+ 'id': 'Tc7b_JGdZfw',
+ 'title': 'Out of the Blue, at Childish Things 10',
+ 'ext': 'mp4',
+ 'description': 'md5:a83d0026666cf5ee970f8bd1cfd69c7f',
+ 'uploader_id': 'helendouglashouse',
+ 'uploader': 'Helen & Douglas House',
+ 'upload_date': '20140328',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Zype embed
+ 'url': 'https://www.cookscountry.com/episode/554-smoky-barbecue-favorites',
+ 'info_dict': {
+ 'id': '5b400b834b32992a310622b9',
+ 'ext': 'mp4',
+ 'title': 'Smoky Barbecue Favorites',
+ 'thumbnail': r're:^https?://.*\.jpe?g',
+ 'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
+ 'upload_date': '20170909',
+ 'timestamp': 1504915200,
+ },
+ 'add_ie': [ZypeIE.ie_key()],
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # videojs embed
+ 'url': 'https://video.sibnet.ru/shell.php?videoid=3422904',
+ 'info_dict': {
+ 'id': 'shell',
+ 'ext': 'mp4',
+ 'title': 'Доставщик пиццы спросил разрешения сыграть на фортепиано',
+ 'description': 'md5:89209cdc587dab1e4a090453dbaa2cb1',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Failed to download MPD manifest'],
+ },
+ {
+ # DailyMotion embed with DM.player
+ 'url': 'https://www.beinsports.com/us/copa-del-rey/video/the-locker-room-valencia-beat-barca-in-copa/1203804',
+ 'info_dict': {
+ 'id': 'k6aKkGHd9FJs4mtJN39',
+ 'ext': 'mp4',
+ 'title': 'The Locker Room: Valencia Beat Barca In Copa del Rey Final',
+ 'description': 'This video is private.',
+ 'uploader_id': 'x1jf30l',
+ 'uploader': 'beIN SPORTS USA',
+ 'upload_date': '20190528',
+ 'timestamp': 1559062971,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ # {
+ # # TODO: find another test
+ # # http://schema.org/VideoObject
+ # 'url': 'https://flipagram.com/f/nyvTSJMKId',
+ # 'md5': '888dcf08b7ea671381f00fab74692755',
+ # 'info_dict': {
+ # 'id': 'nyvTSJMKId',
+ # 'ext': 'mp4',
+ # 'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
+ # 'description': '#love for cats.',
+ # 'timestamp': 1461244995,
+ # 'upload_date': '20160421',
+ # },
+ # 'params': {
+ # 'force_generic_extractor': True,
+ # },
+ # }
+ ]
+
+ def report_following_redirect(self, new_url):
+ """Report information extraction."""
+ self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
+
+ def _extract_rss(self, url, video_id, doc):
+ playlist_title = doc.find('./channel/title').text
+ playlist_desc_el = doc.find('./channel/description')
+ playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
+
+ entries = []
+ for it in doc.findall('./channel/item'):
+ next_url = None
+ enclosure_nodes = it.findall('./enclosure')
+ for e in enclosure_nodes:
+ next_url = e.attrib.get('url')
+ if next_url:
+ break
+
+ if not next_url:
+ next_url = xpath_text(it, 'link', fatal=False)
+
+ if not next_url:
+ continue
+
+ entries.append({
+ '_type': 'url_transparent',
+ 'url': next_url,
+ 'title': it.find('title').text,
+ })
+
+ return {
+ '_type': 'playlist',
+ 'id': url,
+ 'title': playlist_title,
+ 'description': playlist_desc,
+ 'entries': entries,
+ }
+
+ def _extract_camtasia(self, url, video_id, webpage):
+ """ Returns None if no camtasia video can be found. """
+
+ camtasia_cfg = self._search_regex(
+ r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
+ webpage, 'camtasia configuration file', default=None)
+ if camtasia_cfg is None:
+ return None
+
+ title = self._html_search_meta('DC.title', webpage, fatal=True)
+
+ camtasia_url = compat_urlparse.urljoin(url, camtasia_cfg)
+ camtasia_cfg = self._download_xml(
+ camtasia_url, video_id,
+ note='Downloading camtasia configuration',
+ errnote='Failed to download camtasia configuration')
+ fileset_node = camtasia_cfg.find('./playlist/array/fileset')
+
+ entries = []
+ for n in fileset_node.getchildren():
+ url_n = n.find('./uri')
+ if url_n is None:
+ continue
+
+ entries.append({
+ 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
+ 'title': '%s - %s' % (title, n.tag),
+ 'url': compat_urlparse.urljoin(url, url_n.text),
+ 'duration': float_or_none(n.find('./duration').text),
+ })
+
+ return {
+ '_type': 'playlist',
+ 'entries': entries,
+ 'title': title,
+ }
+
+ def _real_extract(self, url):
+ if url.startswith('//'):
+ return self.url_result(self.http_scheme() + url)
+
+ parsed_url = compat_urlparse.urlparse(url)
+ if not parsed_url.scheme:
+ default_search = self._downloader.params.get('default_search')
+ if default_search is None:
+ default_search = 'fixup_error'
+
+ if default_search in ('auto', 'auto_warning', 'fixup_error'):
+ if re.match(r'^[^\s/]+\.[^\s/]+/', url):
+ self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
+ return self.url_result('http://' + url)
+ elif default_search != 'fixup_error':
+ if default_search == 'auto_warning':
+ if re.match(r'^(?:url|URL)$', url):
+ raise ExtractorError(
+ 'Invalid URL: %r . Call youtube-dlc like this: youtube-dlc -v "https://www.youtube.com/watch?v=BaW_jenozKc" ' % url,
+ expected=True)
+ else:
+ self._downloader.report_warning(
+ 'Falling back to youtube search for %s . Set --default-search "auto" to suppress this warning.' % url)
+ return self.url_result('ytsearch:' + url)
+
+ if default_search in ('error', 'fixup_error'):
+ raise ExtractorError(
+ '%r is not a valid URL. '
+ 'Set --default-search "ytsearch" (or run youtube-dlc "ytsearch:%s" ) to search YouTube'
+ % (url, url), expected=True)
+ else:
+ if ':' not in default_search:
+ default_search += ':'
+ return self.url_result(default_search + url)
+
+ url, smuggled_data = unsmuggle_url(url)
+ force_videoid = None
+ is_intentional = smuggled_data and smuggled_data.get('to_generic')
+ if smuggled_data and 'force_videoid' in smuggled_data:
+ force_videoid = smuggled_data['force_videoid']
+ video_id = force_videoid
+ else:
+ video_id = self._generic_id(url)
+
+ self.to_screen('%s: Requesting header' % video_id)
+
+ head_req = HEADRequest(url)
+ head_response = self._request_webpage(
+ head_req, video_id,
+ note=False, errnote='Could not send HEAD request to %s' % url,
+ fatal=False)
+
+ if head_response is not False:
+ # Check for redirect
+ new_url = head_response.geturl()
+ if url != new_url:
+ self.report_following_redirect(new_url)
+ if force_videoid:
+ new_url = smuggle_url(
+ new_url, {'force_videoid': force_videoid})
+ return self.url_result(new_url)
+
+ full_response = None
+ if head_response is False:
+ request = sanitized_Request(url)
+ request.add_header('Accept-Encoding', '*')
+ full_response = self._request_webpage(request, video_id)
+ head_response = full_response
+
+ info_dict = {
+ 'id': video_id,
+ 'title': self._generic_title(url),
+ 'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
+ }
+
+ # Check for direct link to a video
+ content_type = head_response.headers.get('Content-Type', '').lower()
+ m = re.match(r'^(?P<type>audio|video|application(?=/(?:ogg$|(?:vnd\.apple\.|x-)?mpegurl)))/(?P<format_id>[^;\s]+)', content_type)
+ if m:
+ format_id = compat_str(m.group('format_id'))
+ if format_id.endswith('mpegurl'):
+ formats = self._extract_m3u8_formats(url, video_id, 'mp4')
+ elif format_id == 'f4m':
+ formats = self._extract_f4m_formats(url, video_id)
+ else:
+ formats = [{
+ 'format_id': format_id,
+ 'url': url,
+ 'vcodec': 'none' if m.group('type') == 'audio' else None
+ }]
+ info_dict['direct'] = True
+ self._sort_formats(formats)
+ info_dict['formats'] = formats
+ return info_dict
+
+ if not self._downloader.params.get('test', False) and not is_intentional:
+ force = self._downloader.params.get('force_generic_extractor', False)
+ self._downloader.report_warning(
+ '%s on generic information extractor.' % ('Forcing' if force else 'Falling back'))
+
+ if not full_response:
+ request = sanitized_Request(url)
+ # Some webservers may serve compressed content of rather big size (e.g. gzipped flac)
+ # making it impossible to download only chunk of the file (yet we need only 512kB to
+ # test whether it's HTML or not). According to youtube-dlc default Accept-Encoding
+ # that will always result in downloading the whole file that is not desirable.
+ # Therefore for extraction pass we have to override Accept-Encoding to any in order
+ # to accept raw bytes and being able to download only a chunk.
+ # It may probably better to solve this by checking Content-Type for application/octet-stream
+ # after HEAD request finishes, but not sure if we can rely on this.
+ request.add_header('Accept-Encoding', '*')
+ full_response = self._request_webpage(request, video_id)
+
+ first_bytes = full_response.read(512)
+
+ # Is it an M3U playlist?
+ if first_bytes.startswith(b'#EXTM3U'):
+ info_dict['formats'] = self._extract_m3u8_formats(url, video_id, 'mp4')
+ self._sort_formats(info_dict['formats'])
+ return info_dict
+
+ # Maybe it's a direct link to a video?
+ # Be careful not to download the whole thing!
+ if not is_html(first_bytes):
+ self._downloader.report_warning(
+ 'URL could be a direct video link, returning it as such.')
+ info_dict.update({
+ 'direct': True,
+ 'url': url,
+ })
+ return info_dict
+
+ webpage = self._webpage_read_content(
+ full_response, url, video_id, prefix=first_bytes)
+
+ self.report_extraction(video_id)
+
+ # Is it an RSS feed, a SMIL file, an XSPF playlist or a MPD manifest?
+ try:
+ doc = compat_etree_fromstring(webpage.encode('utf-8'))
+ if doc.tag == 'rss':
+ return self._extract_rss(url, video_id, doc)
+ elif doc.tag == 'SmoothStreamingMedia':
+ info_dict['formats'] = self._parse_ism_formats(doc, url)
+ self._sort_formats(info_dict['formats'])
+ return info_dict
+ elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
+ smil = self._parse_smil(doc, url, video_id)
+ self._sort_formats(smil['formats'])
+ return smil
+ elif doc.tag == '{http://xspf.org/ns/0/}playlist':
+ return self.playlist_result(
+ self._parse_xspf(
+ doc, video_id, xspf_url=url,
+ xspf_base_url=full_response.geturl()),
+ video_id)
+ elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
+ info_dict['formats'] = self._parse_mpd_formats(
+ doc,
+ mpd_base_url=full_response.geturl().rpartition('/')[0],
+ mpd_url=url)
+ self._sort_formats(info_dict['formats'])
+ return info_dict
+ elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
+ info_dict['formats'] = self._parse_f4m_formats(doc, url, video_id)
+ self._sort_formats(info_dict['formats'])
+ return info_dict
+ except compat_xml_parse_error:
+ pass
+
+ # Is it a Camtasia project?
+ camtasia_res = self._extract_camtasia(url, video_id, webpage)
+ if camtasia_res is not None:
+ return camtasia_res
+
+ # Sometimes embedded video player is hidden behind percent encoding
+ # (e.g. https://github.com/ytdl-org/youtube-dl/issues/2448)
+ # Unescaping the whole page allows to handle those cases in a generic way
+ webpage = compat_urllib_parse_unquote(webpage)
+
+ # Unescape squarespace embeds to be detected by generic extractor,
+ # see https://github.com/ytdl-org/youtube-dl/issues/21294
+ webpage = re.sub(
+ r'<div[^>]+class=[^>]*?\bsqs-video-wrapper\b[^>]*>',
+ lambda x: unescapeHTML(x.group(0)), webpage)
+
+ # it's tempting to parse this further, but you would
+ # have to take into account all the variations like
+ # Video Title - Site Name
+ # Site Name | Video Title
+ # Video Title - Tagline | Site Name
+ # and so on and so forth; it's just not practical
+ video_title = self._og_search_title(
+ webpage, default=None) or self._html_search_regex(
+ r'(?s)<title>(.*?)</title>', webpage, 'video title',
+ default='video')
+
+ # Try to detect age limit automatically
+ age_limit = self._rta_search(webpage)
+ # And then there are the jokers who advertise that they use RTA,
+ # but actually don't.
+ AGE_LIMIT_MARKERS = [
+ r'Proudly Labeled <a href="http://www\.rtalabel\.org/" title="Restricted to Adults">RTA</a>',
+ ]
+ if any(re.search(marker, webpage) for marker in AGE_LIMIT_MARKERS):
+ age_limit = 18
+
+ # video uploader is domain name
+ video_uploader = self._search_regex(
+ r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
+
+ video_description = self._og_search_description(webpage, default=None)
+ video_thumbnail = self._og_search_thumbnail(webpage, default=None)
+
+ info_dict.update({
+ 'title': video_title,
+ 'description': video_description,
+ 'thumbnail': video_thumbnail,
+ 'age_limit': age_limit,
+ })
+
+ # Look for Brightcove Legacy Studio embeds
+ bc_urls = BrightcoveLegacyIE._extract_brightcove_urls(webpage)
+ if bc_urls:
+ entries = [{
+ '_type': 'url',
+ 'url': smuggle_url(bc_url, {'Referer': url}),
+ 'ie_key': 'BrightcoveLegacy'
+ } for bc_url in bc_urls]
+
+ return {
+ '_type': 'playlist',
+ 'title': video_title,
+ 'id': video_id,
+ 'entries': entries,
+ }
+
+ # Look for Brightcove New Studio embeds
+ bc_urls = BrightcoveNewIE._extract_urls(self, webpage)
+ if bc_urls:
+ return self.playlist_from_matches(
+ bc_urls, video_id, video_title,
+ getter=lambda x: smuggle_url(x, {'referrer': url}),
+ ie='BrightcoveNew')
+
+ # Look for Nexx embeds
+ nexx_urls = NexxIE._extract_urls(webpage)
+ if nexx_urls:
+ return self.playlist_from_matches(nexx_urls, video_id, video_title, ie=NexxIE.ie_key())
+
+ # Look for Nexx iFrame embeds
+ nexx_embed_urls = NexxEmbedIE._extract_urls(webpage)
+ if nexx_embed_urls:
+ return self.playlist_from_matches(nexx_embed_urls, video_id, video_title, ie=NexxEmbedIE.ie_key())
+
+ # Look for ThePlatform embeds
+ tp_urls = ThePlatformIE._extract_urls(webpage)
+ if tp_urls:
+ return self.playlist_from_matches(tp_urls, video_id, video_title, ie='ThePlatform')
+
+ # Look for embedded rtl.nl player
+ matches = re.findall(
+ r'<iframe[^>]+?src="((?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)"',
+ webpage)
+ if matches:
+ return self.playlist_from_matches(matches, video_id, video_title, ie='RtlNl')
+
+ vimeo_urls = VimeoIE._extract_urls(url, webpage)
+ if vimeo_urls:
+ return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
+
+ vid_me_embed_url = self._search_regex(
+ r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
+ webpage, 'vid.me embed', default=None)
+ if vid_me_embed_url is not None:
+ return self.url_result(vid_me_embed_url, 'Vidme')
+
+ # Look for YouTube embeds
+ youtube_urls = YoutubeIE._extract_urls(webpage)
+ if youtube_urls:
+ return self.playlist_from_matches(
+ youtube_urls, video_id, video_title, ie=YoutubeIE.ie_key())
+
+ matches = DailymotionIE._extract_urls(webpage)
+ if matches:
+ return self.playlist_from_matches(matches, video_id, video_title)
+
+ # Look for embedded Dailymotion playlist player (#3822)
+ m = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
+ if m:
+ playlists = re.findall(
+ r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
+ if playlists:
+ return self.playlist_from_matches(
+ playlists, video_id, video_title, lambda p: '//dailymotion.com/playlist/%s' % p)
+
+ # Look for DailyMail embeds
+ dailymail_urls = DailyMailIE._extract_urls(webpage)
+ if dailymail_urls:
+ return self.playlist_from_matches(
+ dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key())
+
+ # Look for Teachable embeds, must be before Wistia
+ teachable_url = TeachableIE._extract_url(webpage, url)
+ if teachable_url:
+ return self.url_result(teachable_url)
+
+ # Look for embedded Wistia player
+ wistia_urls = WistiaIE._extract_urls(webpage)
+ if wistia_urls:
+ playlist = self.playlist_from_matches(wistia_urls, video_id, video_title, ie=WistiaIE.ie_key())
+ for entry in playlist['entries']:
+ entry.update({
+ '_type': 'url_transparent',
+ 'uploader': video_uploader,
+ })
+ return playlist
+
+ # Look for SVT player
+ svt_url = SVTIE._extract_url(webpage)
+ if svt_url:
+ return self.url_result(svt_url, 'SVT')
+
+ # Look for Bandcamp pages with custom domain
+ mobj = re.search(r'<meta property="og:url"[^>]*?content="(.*?bandcamp\.com.*?)"', webpage)
+ if mobj is not None:
+ burl = unescapeHTML(mobj.group(1))
+ # Don't set the extractor because it can be a track url or an album
+ return self.url_result(burl)
+
+ # Look for embedded Vevo player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:cache\.)?vevo\.com/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
+ # Look for embedded Viddler player
+ mobj = re.search(
+ r'<(?:iframe[^>]+?src|param[^>]+?value)=(["\'])(?P<url>(?:https?:)?//(?:www\.)?viddler\.com/(?:embed|player)/.+?)\1',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
+ # Look for NYTimes player
+ mobj = re.search(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
+ # Look for Libsyn player
+ mobj = re.search(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//html5-player\.libsyn\.com/embed/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
+ # Look for Ooyala videos
+ mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage)
+ or re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage)
+ or re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage)
+ or re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage)
+ or re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
+ if mobj is not None:
+ embed_token = self._search_regex(
+ r'embedToken[\'"]?\s*:\s*[\'"]([^\'"]+)',
+ webpage, 'ooyala embed token', default=None)
+ return OoyalaIE._build_url_result(smuggle_url(
+ mobj.group('ec'), {
+ 'domain': url,
+ 'embed_token': embed_token,
+ }))
+
+ # Look for multiple Ooyala embeds on SBN network websites
+ mobj = re.search(r'SBN\.VideoLinkset\.entryGroup\((\[.*?\])', webpage)
+ if mobj is not None:
+ embeds = self._parse_json(mobj.group(1), video_id, fatal=False)
+ if embeds:
+ return self.playlist_from_matches(
+ embeds, video_id, video_title,
+ getter=lambda v: OoyalaIE._url_for_embed_code(smuggle_url(v['provider_video_id'], {'domain': url})), ie='Ooyala')
+
+ # Look for Aparat videos
+ mobj = re.search(r'<iframe .*?src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group(1), 'Aparat')
+
+ # Look for MPORA videos
+ mobj = re.search(r'<iframe .*?src="(http://mpora\.(?:com|de)/videos/[^"]+)"', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group(1), 'Mpora')
+
+ # Look for embedded Facebook player
+ facebook_urls = FacebookIE._extract_urls(webpage)
+ if facebook_urls:
+ return self.playlist_from_matches(facebook_urls, video_id, video_title)
+
+ # Look for embedded VK player
+ mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'VK')
+
+ # Look for embedded Odnoklassniki player
+ odnoklassniki_url = OdnoklassnikiIE._extract_url(webpage)
+ if odnoklassniki_url:
+ return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
+
+ # Look for embedded ivi player
+ mobj = re.search(r'<embed[^>]+?src=(["\'])(?P<url>https?://(?:www\.)?ivi\.ru/video/player.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Ivi')
+
+ # Look for embedded Huffington Post player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'HuffPost')
+
+ # Look for embed.ly
+ mobj = re.search(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+ mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage)
+ if mobj is not None:
+ return self.url_result(compat_urllib_parse_unquote(mobj.group('url')))
+
+ # Look for funnyordie embed
+ matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage)
+ if matches:
+ return self.playlist_from_matches(
+ matches, video_id, video_title, getter=unescapeHTML, ie='FunnyOrDie')
+
+ # Look for BBC iPlayer embed
+ matches = re.findall(r'setPlaylist\("(https?://www\.bbc\.co\.uk/iplayer/[^/]+/[\da-z]{8})"\)', webpage)
+ if matches:
+ return self.playlist_from_matches(matches, video_id, video_title, ie='BBCCoUk')
+
+ # Look for embedded RUTV player
+ rutv_url = RUTVIE._extract_url(webpage)
+ if rutv_url:
+ return self.url_result(rutv_url, 'RUTV')
+
+ # Look for embedded TVC player
+ tvc_url = TVCIE._extract_url(webpage)
+ if tvc_url:
+ return self.url_result(tvc_url, 'TVC')
+
+ # Look for embedded SportBox player
+ sportbox_urls = SportBoxIE._extract_urls(webpage)
+ if sportbox_urls:
+ return self.playlist_from_matches(sportbox_urls, video_id, video_title, ie=SportBoxIE.ie_key())
+
+ # Look for embedded XHamster player
+ xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
+ if xhamster_urls:
+ return self.playlist_from_matches(xhamster_urls, video_id, video_title, ie='XHamsterEmbed')
+
+ # Look for embedded TNAFlixNetwork player
+ tnaflix_urls = TNAFlixNetworkEmbedIE._extract_urls(webpage)
+ if tnaflix_urls:
+ return self.playlist_from_matches(tnaflix_urls, video_id, video_title, ie=TNAFlixNetworkEmbedIE.ie_key())
+
+ # Look for embedded PornHub player
+ pornhub_urls = PornHubIE._extract_urls(webpage)
+ if pornhub_urls:
+ return self.playlist_from_matches(pornhub_urls, video_id, video_title, ie=PornHubIE.ie_key())
+
+ # Look for embedded DrTuber player
+ drtuber_urls = DrTuberIE._extract_urls(webpage)
+ if drtuber_urls:
+ return self.playlist_from_matches(drtuber_urls, video_id, video_title, ie=DrTuberIE.ie_key())
+
+ # Look for embedded RedTube player
+ redtube_urls = RedTubeIE._extract_urls(webpage)
+ if redtube_urls:
+ return self.playlist_from_matches(redtube_urls, video_id, video_title, ie=RedTubeIE.ie_key())
+
+ # Look for embedded Tube8 player
+ tube8_urls = Tube8IE._extract_urls(webpage)
+ if tube8_urls:
+ return self.playlist_from_matches(tube8_urls, video_id, video_title, ie=Tube8IE.ie_key())
+
+ # Look for embedded Mofosex player
+ mofosex_urls = MofosexEmbedIE._extract_urls(webpage)
+ if mofosex_urls:
+ return self.playlist_from_matches(mofosex_urls, video_id, video_title, ie=MofosexEmbedIE.ie_key())
+
+ # Look for embedded Spankwire player
+ spankwire_urls = SpankwireIE._extract_urls(webpage)
+ if spankwire_urls:
+ return self.playlist_from_matches(spankwire_urls, video_id, video_title, ie=SpankwireIE.ie_key())
+
+ # Look for embedded YouPorn player
+ youporn_urls = YouPornIE._extract_urls(webpage)
+ if youporn_urls:
+ return self.playlist_from_matches(youporn_urls, video_id, video_title, ie=YouPornIE.ie_key())
+
+ # Look for embedded Tvigle player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Tvigle')
+
+ # Look for embedded TED player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed(?:-ssl)?\.ted\.com/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'TED')
+
+ # Look for embedded Ustream videos
+ ustream_url = UstreamIE._extract_url(webpage)
+ if ustream_url:
+ return self.url_result(ustream_url, UstreamIE.ie_key())
+
+ # Look for embedded arte.tv player
+ mobj = re.search(
+ r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'ArteTVEmbed')
+
+ # Look for embedded francetv player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?://)?embed\.francetv\.fr/\?ue=.+?)\1',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
+ # Look for embedded smotri.com player
+ smotri_url = SmotriIE._extract_url(webpage)
+ if smotri_url:
+ return self.url_result(smotri_url, 'Smotri')
+
+ # Look for embedded Myvi.ru player
+ myvi_url = MyviIE._extract_url(webpage)
+ if myvi_url:
+ return self.url_result(myvi_url)
+
+ # Look for embedded soundcloud player
+ soundcloud_urls = SoundcloudEmbedIE._extract_urls(webpage)
+ if soundcloud_urls:
+ return self.playlist_from_matches(soundcloud_urls, video_id, video_title, getter=unescapeHTML)
+
+ # Look for tunein player
+ tunein_urls = TuneInBaseIE._extract_urls(webpage)
+ if tunein_urls:
+ return self.playlist_from_matches(tunein_urls, video_id, video_title)
+
+ # Look for embedded mtvservices player
+ mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
+ if mtvservices_url:
+ return self.url_result(mtvservices_url, ie='MTVServicesEmbedded')
+
+ # Look for embedded yahoo player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:screen|movies)\.yahoo\.com/.+?\.html\?format=embed)\1',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Yahoo')
+
+ # Look for embedded sbs.com.au player
+ mobj = re.search(
+ r'''(?x)
+ (?:
+ <meta\s+property="og:video"\s+content=|
+ <iframe[^>]+?src=
+ )
+ (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1''',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'SBS')
+
+ # Look for embedded Cinchcast player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://player\.cinchcast\.com/.+?)\1',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Cinchcast')
+
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://m(?:lb)?\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
+ webpage)
+ if not mobj:
+ mobj = re.search(
+ r'data-video-link=["\'](?P<url>http://m.mlb.com/video/[^"\']+)',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'MLB')
+
+ mobj = re.search(
+ r'<(?:iframe|script)[^>]+?src=(["\'])(?P<url>%s)\1' % CondeNastIE.EMBED_URL,
+ webpage)
+ if mobj is not None:
+ return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
+
+ mobj = re.search(
+ r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Livestream')
+
+ # Look for Zapiks embed
+ mobj = re.search(
+ r'<iframe[^>]+src="(?P<url>https?://(?:www\.)?zapiks\.fr/index\.php\?.+?)"', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Zapiks')
+
+ # Look for Kaltura embeds
+ kaltura_urls = KalturaIE._extract_urls(webpage)
+ if kaltura_urls:
+ return self.playlist_from_matches(
+ kaltura_urls, video_id, video_title,
+ getter=lambda x: smuggle_url(x, {'source_url': url}),
+ ie=KalturaIE.ie_key())
+
+ # Look for EaglePlatform embeds
+ eagleplatform_url = EaglePlatformIE._extract_url(webpage)
+ if eagleplatform_url:
+ return self.url_result(smuggle_url(eagleplatform_url, {'referrer': url}), EaglePlatformIE.ie_key())
+
+ # Look for ClipYou (uses EaglePlatform) embeds
+ mobj = re.search(
+ r'<iframe[^>]+src="https?://(?P<host>media\.clipyou\.ru)/index/player\?.*\brecord_id=(?P<id>\d+).*"', webpage)
+ if mobj is not None:
+ return self.url_result('eagleplatform:%(host)s:%(id)s' % mobj.groupdict(), 'EaglePlatform')
+
+ # Look for Pladform embeds
+ pladform_url = PladformIE._extract_url(webpage)
+ if pladform_url:
+ return self.url_result(pladform_url)
+
+ # Look for Videomore embeds
+ videomore_url = VideomoreIE._extract_url(webpage)
+ if videomore_url:
+ return self.url_result(videomore_url)
+
+ # Look for Webcaster embeds
+ webcaster_url = WebcasterFeedIE._extract_url(self, webpage)
+ if webcaster_url:
+ return self.url_result(webcaster_url, ie=WebcasterFeedIE.ie_key())
+
+ # Look for Playwire embeds
+ mobj = re.search(
+ r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
+ # Look for 5min embeds
+ mobj = re.search(
+ r'<meta[^>]+property="og:video"[^>]+content="https?://embed\.5min\.com/(?P<id>[0-9]+)/?', webpage)
+ if mobj is not None:
+ return self.url_result('5min:%s' % mobj.group('id'), 'FiveMin')
+
+ # Look for Crooks and Liars embeds
+ mobj = re.search(
+ r'<(?:iframe[^>]+src|param[^>]+value)=(["\'])(?P<url>(?:https?:)?//embed\.crooksandliars\.com/(?:embed|v)/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'))
+
+ # Look for NBC Sports VPlayer embeds
+ nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
+ if nbc_sports_url:
+ return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
+
+ # Look for NBC News embeds
+ nbc_news_embed_url = re.search(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
+ if nbc_news_embed_url:
+ return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews')
+
+ # Look for Google Drive embeds
+ google_drive_url = GoogleDriveIE._extract_url(webpage)
+ if google_drive_url:
+ return self.url_result(google_drive_url, 'GoogleDrive')
+
+ # Look for UDN embeds
+ mobj = re.search(
+ r'<iframe[^>]+src="(?:https?:)?(?P<url>%s)"' % UDNEmbedIE._PROTOCOL_RELATIVE_VALID_URL, webpage)
+ if mobj is not None:
+ return self.url_result(
+ compat_urlparse.urljoin(url, mobj.group('url')), 'UDNEmbed')
+
+ # Look for Senate ISVP iframe
+ senate_isvp_url = SenateISVPIE._search_iframe_url(webpage)
+ if senate_isvp_url:
+ return self.url_result(senate_isvp_url, 'SenateISVP')
+
+ # Look for Kinja embeds
+ kinja_embed_urls = KinjaEmbedIE._extract_urls(webpage, url)
+ if kinja_embed_urls:
+ return self.playlist_from_matches(
+ kinja_embed_urls, video_id, video_title)
+
+ # Look for OnionStudios embeds
+ onionstudios_url = OnionStudiosIE._extract_url(webpage)
+ if onionstudios_url:
+ return self.url_result(onionstudios_url)
+
+ # Look for ViewLift embeds
+ viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
+ if viewlift_url:
+ return self.url_result(viewlift_url)
+
+ # Look for JWPlatform embeds
+ jwplatform_urls = JWPlatformIE._extract_urls(webpage)
+ if jwplatform_urls:
+ return self.playlist_from_matches(jwplatform_urls, video_id, video_title, ie=JWPlatformIE.ie_key())
+
+ # Look for Digiteka embeds
+ digiteka_url = DigitekaIE._extract_url(webpage)
+ if digiteka_url:
+ return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
+
+ # Look for Arkena embeds
+ arkena_url = ArkenaIE._extract_url(webpage)
+ if arkena_url:
+ return self.url_result(arkena_url, ArkenaIE.ie_key())
+
+ # Look for Piksel embeds
+ piksel_url = PikselIE._extract_url(webpage)
+ if piksel_url:
+ return self.url_result(piksel_url, PikselIE.ie_key())
+
+ # Look for Limelight embeds
+ limelight_urls = LimelightBaseIE._extract_urls(webpage, url)
+ if limelight_urls:
+ return self.playlist_result(
+ limelight_urls, video_id, video_title, video_description)
+
+ # Look for Anvato embeds
+ anvato_urls = AnvatoIE._extract_urls(self, webpage, video_id)
+ if anvato_urls:
+ return self.playlist_result(
+ anvato_urls, video_id, video_title, video_description)
+
+ # Look for AdobeTVVideo embeds
+ mobj = re.search(
+ r'<iframe[^>]+src=[\'"]((?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]',
+ webpage)
+ if mobj is not None:
+ return self.url_result(
+ self._proto_relative_url(unescapeHTML(mobj.group(1))),
+ 'AdobeTVVideo')
+
+ # Look for Vine embeds
+ mobj = re.search(
+ r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?vine\.co/v/[^/]+/embed/(?:simple|postcard))',
+ webpage)
+ if mobj is not None:
+ return self.url_result(
+ self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
+
+ # Look for VODPlatform embeds
+ mobj = re.search(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:(?:www\.)?vod-platform\.net|embed\.kwikmotion\.com)/[eE]mbed/.+?)\1',
+ webpage)
+ if mobj is not None:
+ return self.url_result(
+ self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
+
+ # Look for Mangomolo embeds
+ mobj = re.search(
+ r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//
+ (?:
+ admin\.mangomolo\.com/analytics/index\.php/customers/embed|
+ player\.mangomolo\.com/v1
+ )/
+ (?:
+ video\?.*?\bid=(?P<video_id>\d+)|
+ (?:index|live)\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
+ ).+?)\1''', webpage)
+ if mobj is not None:
+ info = {
+ '_type': 'url_transparent',
+ 'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
+ 'title': video_title,
+ 'description': video_description,
+ 'thumbnail': video_thumbnail,
+ 'uploader': video_uploader,
+ }
+ video_id = mobj.group('video_id')
+ if video_id:
+ info.update({
+ 'ie_key': 'MangomoloVideo',
+ 'id': video_id,
+ })
+ else:
+ info.update({
+ 'ie_key': 'MangomoloLive',
+ 'id': mobj.group('channel_id'),
+ })
+ return info
+
+ # Look for Instagram embeds
+ instagram_embed_url = InstagramIE._extract_embed_url(webpage)
+ if instagram_embed_url is not None:
+ return self.url_result(
+ self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
+
+ # Look for LiveLeak embeds
+ liveleak_urls = LiveLeakIE._extract_urls(webpage)
+ if liveleak_urls:
+ return self.playlist_from_matches(liveleak_urls, video_id, video_title)
+
+ # Look for 3Q SDN embeds
+ threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
+ if threeqsdn_url:
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': ThreeQSDNIE.ie_key(),
+ 'url': self._proto_relative_url(threeqsdn_url),
+ 'title': video_title,
+ 'description': video_description,
+ 'thumbnail': video_thumbnail,
+ 'uploader': video_uploader,
+ }
+
+ # Look for VBOX7 embeds
+ vbox7_url = Vbox7IE._extract_url(webpage)
+ if vbox7_url:
+ return self.url_result(vbox7_url, Vbox7IE.ie_key())
+
+ # Look for DBTV embeds
+ dbtv_urls = DBTVIE._extract_urls(webpage)
+ if dbtv_urls:
+ return self.playlist_from_matches(dbtv_urls, video_id, video_title, ie=DBTVIE.ie_key())
+
+ # Look for Videa embeds
+ videa_urls = VideaIE._extract_urls(webpage)
+ if videa_urls:
+ return self.playlist_from_matches(videa_urls, video_id, video_title, ie=VideaIE.ie_key())
+
+ # Look for 20 minuten embeds
+ twentymin_urls = TwentyMinutenIE._extract_urls(webpage)
+ if twentymin_urls:
+ return self.playlist_from_matches(
+ twentymin_urls, video_id, video_title, ie=TwentyMinutenIE.ie_key())
+
+ # Look for VideoPress embeds
+ videopress_urls = VideoPressIE._extract_urls(webpage)
+ if videopress_urls:
+ return self.playlist_from_matches(
+ videopress_urls, video_id, video_title, ie=VideoPressIE.ie_key())
+
+ # Look for Rutube embeds
+ rutube_urls = RutubeIE._extract_urls(webpage)
+ if rutube_urls:
+ return self.playlist_from_matches(
+ rutube_urls, video_id, video_title, ie=RutubeIE.ie_key())
+
+ # Look for WashingtonPost embeds
+ wapo_urls = WashingtonPostIE._extract_urls(webpage)
+ if wapo_urls:
+ return self.playlist_from_matches(
+ wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
+
+ # Look for Mediaset embeds
+ mediaset_urls = MediasetIE._extract_urls(self, webpage)
+ if mediaset_urls:
+ return self.playlist_from_matches(
+ mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())
+
+ # Look for JOJ.sk embeds
+ joj_urls = JojIE._extract_urls(webpage)
+ if joj_urls:
+ return self.playlist_from_matches(
+ joj_urls, video_id, video_title, ie=JojIE.ie_key())
+
+ # Look for megaphone.fm embeds
+ mpfn_urls = MegaphoneIE._extract_urls(webpage)
+ if mpfn_urls:
+ return self.playlist_from_matches(
+ mpfn_urls, video_id, video_title, ie=MegaphoneIE.ie_key())
+
+ # Look for vzaar embeds
+ vzaar_urls = VzaarIE._extract_urls(webpage)
+ if vzaar_urls:
+ return self.playlist_from_matches(
+ vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key())
+
+ channel9_urls = Channel9IE._extract_urls(webpage)
+ if channel9_urls:
+ return self.playlist_from_matches(
+ channel9_urls, video_id, video_title, ie=Channel9IE.ie_key())
+
+ vshare_urls = VShareIE._extract_urls(webpage)
+ if vshare_urls:
+ return self.playlist_from_matches(
+ vshare_urls, video_id, video_title, ie=VShareIE.ie_key())
+
+ # Look for Mediasite embeds
+ mediasite_urls = MediasiteIE._extract_urls(webpage)
+ if mediasite_urls:
+ entries = [
+ self.url_result(smuggle_url(
+ compat_urlparse.urljoin(url, mediasite_url),
+ {'UrlReferrer': url}), ie=MediasiteIE.ie_key())
+ for mediasite_url in mediasite_urls]
+ return self.playlist_result(entries, video_id, video_title)
+
+ springboardplatform_urls = SpringboardPlatformIE._extract_urls(webpage)
+ if springboardplatform_urls:
+ return self.playlist_from_matches(
+ springboardplatform_urls, video_id, video_title,
+ ie=SpringboardPlatformIE.ie_key())
+
+ yapfiles_urls = YapFilesIE._extract_urls(webpage)
+ if yapfiles_urls:
+ return self.playlist_from_matches(
+ yapfiles_urls, video_id, video_title, ie=YapFilesIE.ie_key())
+
+ vice_urls = ViceIE._extract_urls(webpage)
+ if vice_urls:
+ return self.playlist_from_matches(
+ vice_urls, video_id, video_title, ie=ViceIE.ie_key())
+
+ xfileshare_urls = XFileShareIE._extract_urls(webpage)
+ if xfileshare_urls:
+ return self.playlist_from_matches(
+ xfileshare_urls, video_id, video_title, ie=XFileShareIE.ie_key())
+
+ cloudflarestream_urls = CloudflareStreamIE._extract_urls(webpage)
+ if cloudflarestream_urls:
+ return self.playlist_from_matches(
+ cloudflarestream_urls, video_id, video_title, ie=CloudflareStreamIE.ie_key())
+
+ peertube_urls = PeerTubeIE._extract_urls(webpage, url)
+ if peertube_urls:
+ return self.playlist_from_matches(
+ peertube_urls, video_id, video_title, ie=PeerTubeIE.ie_key())
+
+ indavideo_urls = IndavideoEmbedIE._extract_urls(webpage)
+ if indavideo_urls:
+ return self.playlist_from_matches(
+ indavideo_urls, video_id, video_title, ie=IndavideoEmbedIE.ie_key())
+
+ apa_urls = APAIE._extract_urls(webpage)
+ if apa_urls:
+ return self.playlist_from_matches(
+ apa_urls, video_id, video_title, ie=APAIE.ie_key())
+
+ foxnews_urls = FoxNewsIE._extract_urls(webpage)
+ if foxnews_urls:
+ return self.playlist_from_matches(
+ foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key())
+
+ sharevideos_urls = [sharevideos_mobj.group('url') for sharevideos_mobj in re.finditer(
+ r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
+ webpage)]
+ if sharevideos_urls:
+ return self.playlist_from_matches(
+ sharevideos_urls, video_id, video_title)
+
+ viqeo_urls = ViqeoIE._extract_urls(webpage)
+ if viqeo_urls:
+ return self.playlist_from_matches(
+ viqeo_urls, video_id, video_title, ie=ViqeoIE.ie_key())
+
+ expressen_urls = ExpressenIE._extract_urls(webpage)
+ if expressen_urls:
+ return self.playlist_from_matches(
+ expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key())
+
+ zype_urls = ZypeIE._extract_urls(webpage)
+ if zype_urls:
+ return self.playlist_from_matches(
+ zype_urls, video_id, video_title, ie=ZypeIE.ie_key())
+
+ # Look for HTML5 media
+ entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
+ if entries:
+ if len(entries) == 1:
+ entries[0].update({
+ 'id': video_id,
+ 'title': video_title,
+ })
+ else:
+ for num, entry in enumerate(entries, start=1):
+ entry.update({
+ 'id': '%s-%s' % (video_id, num),
+ 'title': '%s (%d)' % (video_title, num),
+ })
+ for entry in entries:
+ self._sort_formats(entry['formats'])
+ return self.playlist_result(entries, video_id, video_title)
+
+ jwplayer_data = self._find_jwplayer_data(
+ webpage, video_id, transform_source=js_to_json)
+ if jwplayer_data:
+ try:
+ info = self._parse_jwplayer_data(
+ jwplayer_data, video_id, require_title=False, base_url=url)
+ return merge_dicts(info, info_dict)
+ except ExtractorError:
+ # See https://github.com/ytdl-org/youtube-dl/pull/16735
+ pass
+
+ # Video.js embed
+ mobj = re.search(
+ r'(?s)\bvideojs\s*\(.+?\.src\s*\(\s*((?:\[.+?\]|{.+?}))\s*\)\s*;',
+ webpage)
+ if mobj is not None:
+ sources = self._parse_json(
+ mobj.group(1), video_id, transform_source=js_to_json,
+ fatal=False) or []
+ if not isinstance(sources, list):
+ sources = [sources]
+ formats = []
+ for source in sources:
+ src = source.get('src')
+ if not src or not isinstance(src, compat_str):
+ continue
+ src = compat_urlparse.urljoin(url, src)
+ src_type = source.get('type')
+ if isinstance(src_type, compat_str):
+ src_type = src_type.lower()
+ ext = determine_ext(src).lower()
+ if src_type == 'video/youtube':
+ return self.url_result(src, YoutubeIE.ie_key())
+ if src_type == 'application/dash+xml' or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ src, video_id, mpd_id='dash', fatal=False))
+ elif src_type == 'application/x-mpegurl' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ src, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': src,
+ 'ext': (mimetype2ext(src_type)
+ or ext if ext in KNOWN_EXTENSIONS else 'mp4'),
+ })
+ if formats:
+ self._sort_formats(formats)
+ info_dict['formats'] = formats
+ return info_dict
+
+ # Looking for http://schema.org/VideoObject
+ json_ld = self._search_json_ld(
+ webpage, video_id, default={}, expected_type='VideoObject')
+ if json_ld.get('url'):
+ return merge_dicts(json_ld, info_dict)
+
+ def check_video(vurl):
+ if YoutubeIE.suitable(vurl):
+ return True
+ if RtmpIE.suitable(vurl):
+ return True
+ vpath = compat_urlparse.urlparse(vurl).path
+ vext = determine_ext(vpath)
+ return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js', 'xml')
+
+ def filter_video(urls):
+ return list(filter(check_video, urls))
+
+ # Start with something easy: JW Player in SWFObject
+ found = filter_video(re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage))
+ if not found:
+ # Look for gorilla-vid style embedding
+ found = filter_video(re.findall(r'''(?sx)
+ (?:
+ jw_plugins|
+ JWPlayerOptions|
+ jwplayer\s*\(\s*["'][^'"]+["']\s*\)\s*\.setup
+ )
+ .*?
+ ['"]?file['"]?\s*:\s*["\'](.*?)["\']''', webpage))
+ if not found:
+ # Broaden the search a little bit
+ found = filter_video(re.findall(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage))
+ if not found:
+ # Broaden the findall a little bit: JWPlayer JS loader
+ found = filter_video(re.findall(
+ r'[^A-Za-z0-9]?(?:file|video_url)["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage))
+ if not found:
+ # Flow player
+ found = filter_video(re.findall(r'''(?xs)
+ flowplayer\("[^"]+",\s*
+ \{[^}]+?\}\s*,
+ \s*\{[^}]+? ["']?clip["']?\s*:\s*\{\s*
+ ["']?url["']?\s*:\s*["']([^"']+)["']
+ ''', webpage))
+ if not found:
+ # Cinerama player
+ found = re.findall(
+ r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
+ if not found:
+ # Try to find twitter cards info
+ # twitter:player:stream should be checked before twitter:player since
+ # it is expected to contain a raw stream (see
+ # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
+ found = filter_video(re.findall(
+ r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
+ if not found:
+ # We look for Open Graph info:
+ # We have to match any number spaces between elements, some sites try to align them (eg.: statigr.am)
+ m_video_type = re.findall(r'<meta.*?property="og:video:type".*?content="video/(.*?)"', webpage)
+ # We only look in og:video if the MIME type is a video, don't try if it's a Flash player:
+ if m_video_type is not None:
+ found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
+ if not found:
+ REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
+ found = re.search(
+ r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
+ r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX,
+ webpage)
+ if not found:
+ # Look also in Refresh HTTP header
+ refresh_header = head_response.headers.get('Refresh')
+ if refresh_header:
+ # In python 2 response HTTP headers are bytestrings
+ if sys.version_info < (3, 0) and isinstance(refresh_header, str):
+ refresh_header = refresh_header.decode('iso-8859-1')
+ found = re.search(REDIRECT_REGEX, refresh_header)
+ if found:
+ new_url = compat_urlparse.urljoin(url, unescapeHTML(found.group(1)))
+ if new_url != url:
+ self.report_following_redirect(new_url)
+ return {
+ '_type': 'url',
+ 'url': new_url,
+ }
+ else:
+ found = None
+
+ if not found:
+ # twitter:player is a https URL to iframe player that may or may not
+ # be supported by youtube-dlc thus this is checked the very last (see
+ # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
+ embed_url = self._html_search_meta('twitter:player', webpage, default=None)
+ if embed_url and embed_url != url:
+ return self.url_result(embed_url)
+
+ if not found:
+ raise UnsupportedError(url)
+
+ entries = []
+ for video_url in orderedSet(found):
+ video_url = unescapeHTML(video_url)
+ video_url = video_url.replace('\\/', '/')
+ video_url = compat_urlparse.urljoin(url, video_url)
+ video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
+
+ # Sometimes, jwplayer extraction will result in a YouTube URL
+ if YoutubeIE.suitable(video_url):
+ entries.append(self.url_result(video_url, 'Youtube'))
+ continue
+
+ # here's a fun little line of code for you:
+ video_id = os.path.splitext(video_id)[0]
+
+ entry_info_dict = {
+ 'id': video_id,
+ 'uploader': video_uploader,
+ 'title': video_title,
+ 'age_limit': age_limit,
+ }
+
+ if RtmpIE.suitable(video_url):
+ entry_info_dict.update({
+ '_type': 'url_transparent',
+ 'ie_key': RtmpIE.ie_key(),
+ 'url': video_url,
+ })
+ entries.append(entry_info_dict)
+ continue
+
+ ext = determine_ext(video_url)
+ if ext == 'smil':
+ entry_info_dict['formats'] = self._extract_smil_formats(video_url, video_id)
+ elif ext == 'xspf':
+ return self.playlist_result(self._extract_xspf_playlist(video_url, video_id), video_id)
+ elif ext == 'm3u8':
+ entry_info_dict['formats'] = self._extract_m3u8_formats(video_url, video_id, ext='mp4')
+ elif ext == 'mpd':
+ entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
+ elif ext == 'f4m':
+ entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
+ elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
+ # Just matching .ism/manifest is not enough to be reliably sure
+ # whether it's actually an ISM manifest or some other streaming
+ # manifest since there are various streaming URL formats
+ # possible (see [1]) as well as some other shenanigans like
+ # .smil/manifest URLs that actually serve an ISM (see [2]) and
+ # so on.
+ # Thus the most reasonable way to solve this is to delegate
+ # to generic extractor in order to look into the contents of
+ # the manifest itself.
+ # 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
+ # 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
+ entry_info_dict = self.url_result(
+ smuggle_url(video_url, {'to_generic': True}),
+ GenericIE.ie_key())
+ else:
+ entry_info_dict['url'] = video_url
+
+ if entry_info_dict.get('formats'):
+ self._sort_formats(entry_info_dict['formats'])
+
+ entries.append(entry_info_dict)
+
+ if len(entries) == 1:
+ return entries[0]
+ else:
+ for num, e in enumerate(entries, start=1):
+ # 'url' results don't have a title
+ if e.get('title') is not None:
+ e['title'] = '%s (%d)' % (e['title'], num)
+ return {
+ '_type': 'playlist',
+ 'entries': entries,
+ }
diff --git a/youtube_dl/extractor/gfycat.py b/youtube_dlc/extractor/gfycat.py
index 18a30fe67..18a30fe67 100644
--- a/youtube_dl/extractor/gfycat.py
+++ b/youtube_dlc/extractor/gfycat.py
diff --git a/youtube_dlc/extractor/giantbomb.py b/youtube_dlc/extractor/giantbomb.py
new file mode 100644
index 000000000..c6477958d
--- /dev/null
+++ b/youtube_dlc/extractor/giantbomb.py
@@ -0,0 +1,90 @@
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ qualities,
+ unescapeHTML,
+)
+
+
+class GiantBombIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?giantbomb\.com/(?:videos|shows)/(?P<display_id>[^/]+)/(?P<id>\d+-\d+)'
+ _TESTS = [{
+ 'url': 'http://www.giantbomb.com/videos/quick-look-destiny-the-dark-below/2300-9782/',
+ 'md5': '132f5a803e7e0ab0e274d84bda1e77ae',
+ 'info_dict': {
+ 'id': '2300-9782',
+ 'display_id': 'quick-look-destiny-the-dark-below',
+ 'ext': 'mp4',
+ 'title': 'Quick Look: Destiny: The Dark Below',
+ 'description': 'md5:0aa3aaf2772a41b91d44c63f30dfad24',
+ 'duration': 2399,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ }
+ }, {
+ 'url': 'https://www.giantbomb.com/shows/ben-stranding/2970-20212',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._og_search_title(webpage)
+ description = self._og_search_description(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ video = json.loads(unescapeHTML(self._search_regex(
+ r'data-video="([^"]+)"', webpage, 'data-video')))
+
+ duration = int_or_none(video.get('lengthSeconds'))
+
+ quality = qualities([
+ 'f4m_low', 'progressive_low', 'f4m_high',
+ 'progressive_high', 'f4m_hd', 'progressive_hd'])
+
+ formats = []
+ for format_id, video_url in video['videoStreams'].items():
+ if format_id == 'f4m_stream':
+ continue
+ ext = determine_ext(video_url)
+ if ext == 'f4m':
+ f4m_formats = self._extract_f4m_formats(video_url + '?hdcore=3.3.1', display_id)
+ if f4m_formats:
+ f4m_formats[0]['quality'] = quality(format_id)
+ formats.extend(f4m_formats)
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, display_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ 'quality': quality(format_id),
+ })
+
+ if not formats:
+ youtube_id = video.get('youtubeID')
+ if youtube_id:
+ return self.url_result(youtube_id, 'Youtube')
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/giga.py b/youtube_dlc/extractor/giga.py
index 5a9992a27..5a9992a27 100644
--- a/youtube_dl/extractor/giga.py
+++ b/youtube_dlc/extractor/giga.py
diff --git a/youtube_dl/extractor/gigya.py b/youtube_dlc/extractor/gigya.py
index 412178492..412178492 100644
--- a/youtube_dl/extractor/gigya.py
+++ b/youtube_dlc/extractor/gigya.py
diff --git a/youtube_dl/extractor/glide.py b/youtube_dlc/extractor/glide.py
index d94dfbf09..d94dfbf09 100644
--- a/youtube_dl/extractor/glide.py
+++ b/youtube_dlc/extractor/glide.py
diff --git a/youtube_dlc/extractor/globo.py b/youtube_dlc/extractor/globo.py
new file mode 100644
index 000000000..60d842d3a
--- /dev/null
+++ b/youtube_dlc/extractor/globo.py
@@ -0,0 +1,240 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+import hashlib
+import json
+import random
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+)
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ orderedSet,
+ str_or_none,
+)
+
+
+class GloboIE(InfoExtractor):
+ _VALID_URL = r'(?:globo:|https?://.+?\.globo\.com/(?:[^/]+/)*(?:v/(?:[^/]+/)?|videos/))(?P<id>\d{7,})'
+ _NETRC_MACHINE = 'globo'
+ _TESTS = [{
+ 'url': 'http://g1.globo.com/carros/autoesporte/videos/t/exclusivos-do-g1/v/mercedes-benz-gla-passa-por-teste-de-colisao-na-europa/3607726/',
+ 'md5': 'b3ccc801f75cd04a914d51dadb83a78d',
+ 'info_dict': {
+ 'id': '3607726',
+ 'ext': 'mp4',
+ 'title': 'Mercedes-Benz GLA passa por teste de colisão na Europa',
+ 'duration': 103.204,
+ 'uploader': 'Globo.com',
+ 'uploader_id': '265',
+ },
+ }, {
+ 'url': 'http://globoplay.globo.com/v/4581987/',
+ 'md5': 'f36a1ecd6a50da1577eee6dd17f67eff',
+ 'info_dict': {
+ 'id': '4581987',
+ 'ext': 'mp4',
+ 'title': 'Acidentes de trânsito estão entre as maiores causas de queda de energia em SP',
+ 'duration': 137.973,
+ 'uploader': 'Rede Globo',
+ 'uploader_id': '196',
+ },
+ }, {
+ 'url': 'http://canalbrasil.globo.com/programas/sangue-latino/videos/3928201.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://globosatplay.globo.com/globonews/v/4472924/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://globotv.globo.com/t/programa/v/clipe-sexo-e-as-negas-adeus/3836166/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://canaloff.globo.com/programas/desejar-profundo/videos/4518560.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'globo:3607726',
+ 'only_matching': True,
+ }]
+
+ def _real_initialize(self):
+ email, password = self._get_login_info()
+ if email is None:
+ return
+
+ try:
+ glb_id = (self._download_json(
+ 'https://login.globo.com/api/authentication', None, data=json.dumps({
+ 'payload': {
+ 'email': email,
+ 'password': password,
+ 'serviceId': 4654,
+ },
+ }).encode(), headers={
+ 'Content-Type': 'application/json; charset=utf-8',
+ }) or {}).get('glbId')
+ if glb_id:
+ self._set_cookie('.globo.com', 'GLBID', glb_id)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ resp = self._parse_json(e.cause.read(), None)
+ raise ExtractorError(resp.get('userMessage') or resp['id'], expected=True)
+ raise
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'http://api.globovideos.com/videos/%s/playlist' % video_id,
+ video_id)['videos'][0]
+ if video.get('encrypted') is True:
+ raise ExtractorError('This video is DRM protected.', expected=True)
+
+ title = video['title']
+
+ formats = []
+ subtitles = {}
+ for resource in video['resources']:
+ resource_id = resource.get('_id')
+ resource_url = resource.get('url')
+ resource_type = resource.get('type')
+ if not resource_url or (resource_type == 'media' and not resource_id) or resource_type not in ('subtitle', 'media'):
+ continue
+
+ if resource_type == 'subtitle':
+ subtitles.setdefault(resource.get('language') or 'por', []).append({
+ 'url': resource_url,
+ })
+ continue
+
+ security = self._download_json(
+ 'http://security.video.globo.com/videos/%s/hash' % video_id,
+ video_id, 'Downloading security hash for %s' % resource_id, query={
+ 'player': 'desktop',
+ 'version': '5.19.1',
+ 'resource_id': resource_id,
+ })
+
+ security_hash = security.get('hash')
+ if not security_hash:
+ message = security.get('message')
+ if message:
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, message), expected=True)
+ continue
+
+ hash_code = security_hash[:2]
+ padding = '%010d' % random.randint(1, 10000000000)
+ if hash_code in ('04', '14'):
+ received_time = security_hash[3:13]
+ received_md5 = security_hash[24:]
+ hash_prefix = security_hash[:23]
+ elif hash_code in ('02', '12', '03', '13'):
+ received_time = security_hash[2:12]
+ received_md5 = security_hash[22:]
+ padding += '1'
+ hash_prefix = '05' + security_hash[:22]
+
+ padded_sign_time = compat_str(int(received_time) + 86400) + padding
+ md5_data = (received_md5 + padded_sign_time + '0xAC10FD').encode()
+ signed_md5 = base64.urlsafe_b64encode(hashlib.md5(md5_data).digest()).decode().strip('=')
+ signed_hash = hash_prefix + padded_sign_time + signed_md5
+ signed_url = '%s?h=%s&k=html5&a=%s&u=%s' % (resource_url, signed_hash, 'F' if video.get('subscriber_only') else 'A', security.get('user') or '')
+
+ if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'):
+ formats.extend(self._extract_m3u8_formats(
+ signed_url, resource_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif resource_id.endswith('mpd') or resource_url.endswith('.mpd'):
+ formats.extend(self._extract_mpd_formats(
+ signed_url, resource_id, mpd_id='dash', fatal=False))
+ elif resource_id.endswith('manifest') or resource_url.endswith('/manifest'):
+ formats.extend(self._extract_ism_formats(
+ signed_url, resource_id, ism_id='mss', fatal=False))
+ else:
+ formats.append({
+ 'url': signed_url,
+ 'format_id': 'http-%s' % resource_id,
+ 'height': int_or_none(resource.get('height')),
+ })
+
+ self._sort_formats(formats)
+
+ duration = float_or_none(video.get('duration'), 1000)
+ uploader = video.get('channel')
+ uploader_id = str_or_none(video.get('channel_id'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'duration': duration,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class GloboArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://.+?\.globo\.com/(?:[^/]+/)*(?P<id>[^/.]+)(?:\.html)?'
+
+ _VIDEOID_REGEXES = [
+ r'\bdata-video-id=["\'](\d{7,})',
+ r'\bdata-player-videosids=["\'](\d{7,})',
+ r'\bvideosIDs\s*:\s*["\']?(\d{7,})',
+ r'\bdata-id=["\'](\d{7,})',
+ r'<div[^>]+\bid=["\'](\d{7,})',
+ ]
+
+ _TESTS = [{
+ 'url': 'http://g1.globo.com/jornal-nacional/noticia/2014/09/novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes.html',
+ 'info_dict': {
+ 'id': 'novidade-na-fiscalizacao-de-bagagem-pela-receita-provoca-discussoes',
+ 'title': 'Novidade na fiscalização de bagagem pela Receita provoca discussões',
+ 'description': 'md5:c3c4b4d4c30c32fce460040b1ac46b12',
+ },
+ 'playlist_count': 1,
+ }, {
+ 'url': 'http://g1.globo.com/pr/parana/noticia/2016/09/mpf-denuncia-lula-marisa-e-mais-seis-na-operacao-lava-jato.html',
+ 'info_dict': {
+ 'id': 'mpf-denuncia-lula-marisa-e-mais-seis-na-operacao-lava-jato',
+ 'title': "Lula era o 'comandante máximo' do esquema da Lava Jato, diz MPF",
+ 'description': 'md5:8aa7cc8beda4dc71cc8553e00b77c54c',
+ },
+ 'playlist_count': 6,
+ }, {
+ 'url': 'http://gq.globo.com/Prazeres/Poder/noticia/2015/10/all-o-desafio-assista-ao-segundo-capitulo-da-serie.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://gshow.globo.com/programas/tv-xuxa/O-Programa/noticia/2014/01/xuxa-e-junno-namoram-muuuito-em-luau-de-zeze-di-camargo-e-luciano.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://oglobo.globo.com/rio/a-amizade-entre-um-entregador-de-farmacia-um-piano-19946271',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if GloboIE.suitable(url) else super(GloboArticleIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ video_ids = []
+ for video_regex in self._VIDEOID_REGEXES:
+ video_ids.extend(re.findall(video_regex, webpage))
+ entries = [
+ self.url_result('globo:%s' % video_id, GloboIE.ie_key())
+ for video_id in orderedSet(video_ids)]
+ title = self._og_search_title(webpage, fatal=False)
+ description = self._html_search_meta('description', webpage)
+ return self.playlist_result(entries, display_id, title, description)
diff --git a/youtube_dlc/extractor/go.py b/youtube_dlc/extractor/go.py
new file mode 100644
index 000000000..03cfba91f
--- /dev/null
+++ b/youtube_dlc/extractor/go.py
@@ -0,0 +1,268 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .adobepass import AdobePassIE
+from ..utils import (
+ int_or_none,
+ determine_ext,
+ parse_age_limit,
+ urlencode_postdata,
+ ExtractorError,
+)
+
+
+class GoIE(AdobePassIE):
+ _SITE_INFO = {
+ 'abc': {
+ 'brand': '001',
+ 'requestor_id': 'ABC',
+ },
+ 'freeform': {
+ 'brand': '002',
+ 'requestor_id': 'ABCFamily',
+ },
+ 'watchdisneychannel': {
+ 'brand': '004',
+ 'resource_id': 'Disney',
+ },
+ 'watchdisneyjunior': {
+ 'brand': '008',
+ 'resource_id': 'DisneyJunior',
+ },
+ 'watchdisneyxd': {
+ 'brand': '009',
+ 'resource_id': 'DisneyXD',
+ },
+ 'disneynow': {
+ 'brand': '011',
+ 'resource_id': 'Disney',
+ }
+ }
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:(?P<sub_domain>%s)\.)?go|
+ (?P<sub_domain_2>abc|freeform|disneynow)
+ )\.com/
+ (?:
+ (?:[^/]+/)*(?P<id>[Vv][Dd][Kk][Aa]\w+)|
+ (?:[^/]+/)*(?P<display_id>[^/?\#]+)
+ )
+ ''' % '|'.join(list(_SITE_INFO.keys()))
+ _TESTS = [{
+ 'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
+ 'info_dict': {
+ 'id': 'VDKA3807643',
+ 'ext': 'mp4',
+ 'title': 'The Traitor in the White House',
+ 'description': 'md5:05b009d2d145a1e85d25111bd37222e8',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'This content is no longer available.',
+ }, {
+ 'url': 'http://watchdisneyxd.go.com/doraemon',
+ 'info_dict': {
+ 'title': 'Doraemon',
+ 'id': 'SH55574025',
+ },
+ 'playlist_mincount': 51,
+ }, {
+ 'url': 'http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood',
+ 'info_dict': {
+ 'id': 'VDKA3609139',
+ 'ext': 'mp4',
+ 'title': 'This Guilty Blood',
+ 'description': 'md5:f18e79ad1c613798d95fdabfe96cd292',
+ 'age_limit': 14,
+ },
+ 'params': {
+ 'geo_bypass_ip_block': '3.244.239.0/24',
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet',
+ 'info_dict': {
+ 'id': 'VDKA13435179',
+ 'ext': 'mp4',
+ 'title': 'The Bet',
+ 'description': 'md5:c66de8ba2e92c6c5c113c3ade84ab404',
+ 'age_limit': 14,
+ },
+ 'params': {
+ 'geo_bypass_ip_block': '3.244.239.0/24',
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://abc.go.com/shows/world-news-tonight/episode-guide/2017-02/17-021717-intense-stand-off-between-man-with-rifle-and-police-in-oakland',
+ 'only_matching': True,
+ }, {
+ # brand 004
+ 'url': 'http://disneynow.go.com/shows/big-hero-6-the-series/season-01/episode-10-mr-sparkles-loses-his-sparkle/vdka4637915',
+ 'only_matching': True,
+ }, {
+ # brand 008
+ 'url': 'http://disneynow.go.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://disneynow.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
+ 'only_matching': True,
+ }]
+
+ def _extract_videos(self, brand, video_id='-1', show_id='-1'):
+ display_id = video_id if video_id != '-1' else show_id
+ return self._download_json(
+ 'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/%s/001/-1/%s/-1/%s/-1/-1.json' % (brand, show_id, video_id),
+ display_id)['video']
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ sub_domain = mobj.group('sub_domain') or mobj.group('sub_domain_2')
+ video_id, display_id = mobj.group('id', 'display_id')
+ site_info = self._SITE_INFO.get(sub_domain, {})
+ brand = site_info.get('brand')
+ if not video_id or not site_info:
+ webpage = self._download_webpage(url, display_id or video_id)
+ video_id = self._search_regex(
+ (
+ # There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
+ # from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
+ r'data-video-id=["\']*(VDKA\w+)',
+ # https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
+ r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)'
+ ), webpage, 'video id', default=video_id)
+ if not site_info:
+ brand = self._search_regex(
+ (r'data-brand=\s*["\']\s*(\d+)',
+ r'data-page-brand=\s*["\']\s*(\d+)'), webpage, 'brand',
+ default='004')
+ site_info = next(
+ si for _, si in self._SITE_INFO.items()
+ if si.get('brand') == brand)
+ if not video_id:
+ # show extraction works for Disney, DisneyJunior and DisneyXD
+ # ABC and Freeform has different layout
+ show_id = self._search_regex(r'data-show-id=["\']*(SH\d+)', webpage, 'show id')
+ videos = self._extract_videos(brand, show_id=show_id)
+ show_title = self._search_regex(r'data-show-title="([^"]+)"', webpage, 'show title', fatal=False)
+ entries = []
+ for video in videos:
+ entries.append(self.url_result(
+ video['url'], 'Go', video.get('id'), video.get('title')))
+ entries.reverse()
+ return self.playlist_result(entries, show_id, show_title)
+ video_data = self._extract_videos(brand, video_id)[0]
+ video_id = video_data['id']
+ title = video_data['title']
+
+ formats = []
+ for asset in video_data.get('assets', {}).get('asset', []):
+ asset_url = asset.get('value')
+ if not asset_url:
+ continue
+ format_id = asset.get('format')
+ ext = determine_ext(asset_url)
+ if ext == 'm3u8':
+ video_type = video_data.get('type')
+ data = {
+ 'video_id': video_data['id'],
+ 'video_type': video_type,
+ 'brand': brand,
+ 'device': '001',
+ }
+ if video_data.get('accesslevel') == '1':
+ requestor_id = site_info.get('requestor_id', 'DisneyChannels')
+ resource = site_info.get('resource_id') or self._get_mvpd_resource(
+ requestor_id, title, video_id, None)
+ auth = self._extract_mvpd_auth(
+ url, video_id, requestor_id, resource)
+ data.update({
+ 'token': auth,
+ 'token_type': 'ap',
+ 'adobe_requestor_id': requestor_id,
+ })
+ else:
+ self._initialize_geo_bypass({'countries': ['US']})
+ entitlement = self._download_json(
+ 'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
+ video_id, data=urlencode_postdata(data))
+ errors = entitlement.get('errors', {}).get('errors', [])
+ if errors:
+ for error in errors:
+ if error.get('code') == 1002:
+ self.raise_geo_restricted(
+ error['message'], countries=['US'])
+ error_message = ', '.join([error['message'] for error in errors])
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, error_message), expected=True)
+ asset_url += '?' + entitlement['uplynkData']['sessionKey']
+ formats.extend(self._extract_m3u8_formats(
+ asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False))
+ else:
+ f = {
+ 'format_id': format_id,
+ 'url': asset_url,
+ 'ext': ext,
+ }
+ if re.search(r'(?:/mp4/source/|_source\.mp4)', asset_url):
+ f.update({
+ 'format_id': ('%s-' % format_id if format_id else '') + 'SOURCE',
+ 'preference': 1,
+ })
+ else:
+ mobj = re.search(r'/(\d+)x(\d+)/', asset_url)
+ if mobj:
+ height = int(mobj.group(2))
+ f.update({
+ 'format_id': ('%s-' % format_id if format_id else '') + '%dP' % height,
+ 'width': int(mobj.group(1)),
+ 'height': height,
+ })
+ formats.append(f)
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for cc in video_data.get('closedcaption', {}).get('src', []):
+ cc_url = cc.get('value')
+ if not cc_url:
+ continue
+ ext = determine_ext(cc_url)
+ if ext == 'xml':
+ ext = 'ttml'
+ subtitles.setdefault(cc.get('lang'), []).append({
+ 'url': cc_url,
+ 'ext': ext,
+ })
+
+ thumbnails = []
+ for thumbnail in video_data.get('thumbnails', {}).get('thumbnail', []):
+ thumbnail_url = thumbnail.get('value')
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'width': int_or_none(thumbnail.get('width')),
+ 'height': int_or_none(thumbnail.get('height')),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('longdescription') or video_data.get('description'),
+ 'duration': int_or_none(video_data.get('duration', {}).get('value'), 1000),
+ 'age_limit': parse_age_limit(video_data.get('tvrating', {}).get('rating')),
+ 'episode_number': int_or_none(video_data.get('episodenumber')),
+ 'series': video_data.get('show', {}).get('title'),
+ 'season_number': int_or_none(video_data.get('season', {}).get('num')),
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/youtube_dl/extractor/godtube.py b/youtube_dlc/extractor/godtube.py
index 92efd16b3..92efd16b3 100644
--- a/youtube_dl/extractor/godtube.py
+++ b/youtube_dlc/extractor/godtube.py
diff --git a/youtube_dl/extractor/golem.py b/youtube_dlc/extractor/golem.py
index 47a068e74..47a068e74 100644
--- a/youtube_dl/extractor/golem.py
+++ b/youtube_dlc/extractor/golem.py
diff --git a/youtube_dl/extractor/googledrive.py b/youtube_dlc/extractor/googledrive.py
index 589e4d5c3..589e4d5c3 100644
--- a/youtube_dl/extractor/googledrive.py
+++ b/youtube_dlc/extractor/googledrive.py
diff --git a/youtube_dl/extractor/googleplus.py b/youtube_dlc/extractor/googleplus.py
index 6b927bb44..6b927bb44 100644
--- a/youtube_dl/extractor/googleplus.py
+++ b/youtube_dlc/extractor/googleplus.py
diff --git a/youtube_dl/extractor/googlesearch.py b/youtube_dlc/extractor/googlesearch.py
index 5279fa807..5279fa807 100644
--- a/youtube_dl/extractor/googlesearch.py
+++ b/youtube_dlc/extractor/googlesearch.py
diff --git a/youtube_dl/extractor/goshgay.py b/youtube_dlc/extractor/goshgay.py
index 377981d3e..377981d3e 100644
--- a/youtube_dl/extractor/goshgay.py
+++ b/youtube_dlc/extractor/goshgay.py
diff --git a/youtube_dl/extractor/gputechconf.py b/youtube_dlc/extractor/gputechconf.py
index 73dc62c49..73dc62c49 100644
--- a/youtube_dl/extractor/gputechconf.py
+++ b/youtube_dlc/extractor/gputechconf.py
diff --git a/youtube_dl/extractor/groupon.py b/youtube_dlc/extractor/groupon.py
index a6da90931..a6da90931 100644
--- a/youtube_dl/extractor/groupon.py
+++ b/youtube_dlc/extractor/groupon.py
diff --git a/youtube_dl/extractor/hbo.py b/youtube_dlc/extractor/hbo.py
index 68df748f5..68df748f5 100644
--- a/youtube_dl/extractor/hbo.py
+++ b/youtube_dlc/extractor/hbo.py
diff --git a/youtube_dl/extractor/hearthisat.py b/youtube_dlc/extractor/hearthisat.py
index 18c252012..18c252012 100644
--- a/youtube_dl/extractor/hearthisat.py
+++ b/youtube_dlc/extractor/hearthisat.py
diff --git a/youtube_dl/extractor/heise.py b/youtube_dlc/extractor/heise.py
index cbe564a3c..cbe564a3c 100644
--- a/youtube_dl/extractor/heise.py
+++ b/youtube_dlc/extractor/heise.py
diff --git a/youtube_dlc/extractor/hellporno.py b/youtube_dlc/extractor/hellporno.py
new file mode 100644
index 000000000..fae425103
--- /dev/null
+++ b/youtube_dlc/extractor/hellporno.py
@@ -0,0 +1,76 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ merge_dicts,
+ remove_end,
+ unified_timestamp,
+)
+
+
+class HellPornoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?hellporno\.(?:com/videos|net/v)/(?P<id>[^/]+)'
+ _TESTS = [{
+ 'url': 'http://hellporno.com/videos/dixie-is-posing-with-naked-ass-very-erotic/',
+ 'md5': 'f0a46ebc0bed0c72ae8fe4629f7de5f3',
+ 'info_dict': {
+ 'id': '149116',
+ 'display_id': 'dixie-is-posing-with-naked-ass-very-erotic',
+ 'ext': 'mp4',
+ 'title': 'Dixie is posing with naked ass very erotic',
+ 'description': 'md5:9a72922749354edb1c4b6e540ad3d215',
+ 'categories': list,
+ 'thumbnail': r're:https?://.*\.jpg$',
+ 'duration': 240,
+ 'timestamp': 1398762720,
+ 'upload_date': '20140429',
+ 'view_count': int,
+ 'age_limit': 18,
+ },
+ }, {
+ 'url': 'http://hellporno.net/v/186271/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ title = remove_end(self._html_search_regex(
+ r'<title>([^<]+)</title>', webpage, 'title'), ' - Hell Porno')
+
+ info = self._parse_html5_media_entries(url, webpage, display_id)[0]
+ self._sort_formats(info['formats'])
+
+ video_id = self._search_regex(
+ (r'chs_object\s*=\s*["\'](\d+)',
+ r'params\[["\']video_id["\']\]\s*=\s*(\d+)'), webpage, 'video id',
+ default=display_id)
+ description = self._search_regex(
+ r'class=["\']desc_video_view_v2[^>]+>([^<]+)', webpage,
+ 'description', fatal=False)
+ categories = [
+ c.strip()
+ for c in self._html_search_meta(
+ 'keywords', webpage, 'categories', default='').split(',')
+ if c.strip()]
+ duration = int_or_none(self._og_search_property(
+ 'video:duration', webpage, fatal=False))
+ timestamp = unified_timestamp(self._og_search_property(
+ 'video:release_date', webpage, fatal=False))
+ view_count = int_or_none(self._search_regex(
+ r'>Views\s+(\d+)', webpage, 'view count', fatal=False))
+
+ return merge_dicts(info, {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'categories': categories,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'age_limit': 18,
+ })
diff --git a/youtube_dl/extractor/helsinki.py b/youtube_dlc/extractor/helsinki.py
index 575fb332a..575fb332a 100644
--- a/youtube_dl/extractor/helsinki.py
+++ b/youtube_dlc/extractor/helsinki.py
diff --git a/youtube_dl/extractor/hentaistigma.py b/youtube_dlc/extractor/hentaistigma.py
index 86a93de4d..86a93de4d 100644
--- a/youtube_dl/extractor/hentaistigma.py
+++ b/youtube_dlc/extractor/hentaistigma.py
diff --git a/youtube_dl/extractor/hgtv.py b/youtube_dlc/extractor/hgtv.py
index a4f332565..a4f332565 100644
--- a/youtube_dl/extractor/hgtv.py
+++ b/youtube_dlc/extractor/hgtv.py
diff --git a/youtube_dl/extractor/hidive.py b/youtube_dlc/extractor/hidive.py
index f26f80265..f26f80265 100644
--- a/youtube_dl/extractor/hidive.py
+++ b/youtube_dlc/extractor/hidive.py
diff --git a/youtube_dl/extractor/historicfilms.py b/youtube_dlc/extractor/historicfilms.py
index 56343e98f..56343e98f 100644
--- a/youtube_dl/extractor/historicfilms.py
+++ b/youtube_dlc/extractor/historicfilms.py
diff --git a/youtube_dl/extractor/hitbox.py b/youtube_dlc/extractor/hitbox.py
index 3e5ff2685..3e5ff2685 100644
--- a/youtube_dl/extractor/hitbox.py
+++ b/youtube_dlc/extractor/hitbox.py
diff --git a/youtube_dl/extractor/hitrecord.py b/youtube_dlc/extractor/hitrecord.py
index fd5dc2935..fd5dc2935 100644
--- a/youtube_dl/extractor/hitrecord.py
+++ b/youtube_dlc/extractor/hitrecord.py
diff --git a/youtube_dl/extractor/hketv.py b/youtube_dlc/extractor/hketv.py
index 1f3502b90..1f3502b90 100644
--- a/youtube_dl/extractor/hketv.py
+++ b/youtube_dlc/extractor/hketv.py
diff --git a/youtube_dl/extractor/hornbunny.py b/youtube_dlc/extractor/hornbunny.py
index c458a959d..c458a959d 100644
--- a/youtube_dl/extractor/hornbunny.py
+++ b/youtube_dlc/extractor/hornbunny.py
diff --git a/youtube_dl/extractor/hotnewhiphop.py b/youtube_dlc/extractor/hotnewhiphop.py
index 4703e1894..4703e1894 100644
--- a/youtube_dl/extractor/hotnewhiphop.py
+++ b/youtube_dlc/extractor/hotnewhiphop.py
diff --git a/youtube_dlc/extractor/hotstar.py b/youtube_dlc/extractor/hotstar.py
new file mode 100644
index 000000000..f97eefa3d
--- /dev/null
+++ b/youtube_dlc/extractor/hotstar.py
@@ -0,0 +1,210 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import hashlib
+import hmac
+import re
+import time
+import uuid
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+)
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ str_or_none,
+ try_get,
+ url_or_none,
+)
+
+
+class HotStarBaseIE(InfoExtractor):
+ _AKAMAI_ENCRYPTION_KEY = b'\x05\xfc\x1a\x01\xca\xc9\x4b\xc4\x12\xfc\x53\x12\x07\x75\xf9\xee'
+
+ def _call_api_impl(self, path, video_id, query):
+ st = int(time.time())
+ exp = st + 6000
+ auth = 'st=%d~exp=%d~acl=/*' % (st, exp)
+ auth += '~hmac=' + hmac.new(self._AKAMAI_ENCRYPTION_KEY, auth.encode(), hashlib.sha256).hexdigest()
+ response = self._download_json(
+ 'https://api.hotstar.com/' + path, video_id, headers={
+ 'hotstarauth': auth,
+ 'x-country-code': 'IN',
+ 'x-platform-code': 'JIO',
+ }, query=query)
+ if response['statusCode'] != 'OK':
+ raise ExtractorError(
+ response['body']['message'], expected=True)
+ return response['body']['results']
+
+ def _call_api(self, path, video_id, query_name='contentId'):
+ return self._call_api_impl(path, video_id, {
+ query_name: video_id,
+ 'tas': 10000,
+ })
+
+ def _call_api_v2(self, path, video_id):
+ return self._call_api_impl(
+ '%s/in/contents/%s' % (path, video_id), video_id, {
+ 'desiredConfig': 'encryption:plain;ladder:phone,tv;package:hls,dash',
+ 'client': 'mweb',
+ 'clientVersion': '6.18.0',
+ 'deviceId': compat_str(uuid.uuid4()),
+ 'osName': 'Windows',
+ 'osVersion': '10',
+ })
+
+
+class HotStarIE(HotStarBaseIE):
+ IE_NAME = 'hotstar'
+ _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
+ _TESTS = [{
+ # contentData
+ 'url': 'https://www.hotstar.com/can-you-not-spread-rumours/1000076273',
+ 'info_dict': {
+ 'id': '1000076273',
+ 'ext': 'mp4',
+ 'title': 'Can You Not Spread Rumours?',
+ 'description': 'md5:c957d8868e9bc793ccb813691cc4c434',
+ 'timestamp': 1447248600,
+ 'upload_date': '20151111',
+ 'duration': 381,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ # contentDetail
+ 'url': 'https://www.hotstar.com/movies/radha-gopalam/1000057157',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.hotstar.com/1000000515',
+ 'only_matching': True,
+ }, {
+ # only available via api v2
+ 'url': 'https://www.hotstar.com/tv/ek-bhram-sarvagun-sampanna/s-2116/janhvi-targets-suman/1000234847',
+ 'only_matching': True,
+ }]
+ _GEO_BYPASS = False
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+ app_state = self._parse_json(self._search_regex(
+ r'<script>window\.APP_STATE\s*=\s*({.+?})</script>',
+ webpage, 'app state'), video_id)
+ video_data = {}
+ getters = list(
+ lambda x, k=k: x['initialState']['content%s' % k]['content']
+ for k in ('Data', 'Detail')
+ )
+ for v in app_state.values():
+ content = try_get(v, getters, dict)
+ if content and content.get('contentId') == video_id:
+ video_data = content
+ break
+
+ title = video_data['title']
+
+ if video_data.get('drmProtected'):
+ raise ExtractorError('This video is DRM protected.', expected=True)
+
+ headers = {'Referer': url}
+ formats = []
+ geo_restricted = False
+ playback_sets = self._call_api_v2('h/v2/play', video_id)['playBackSets']
+ for playback_set in playback_sets:
+ if not isinstance(playback_set, dict):
+ continue
+ format_url = url_or_none(playback_set.get('playbackUrl'))
+ if not format_url:
+ continue
+ format_url = re.sub(
+ r'(?<=//staragvod)(\d)', r'web\1', format_url)
+ tags = str_or_none(playback_set.get('tagsCombination')) or ''
+ if tags and 'encryption:plain' not in tags:
+ continue
+ ext = determine_ext(format_url)
+ try:
+ if 'package:hls' in tags or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4',
+ entry_protocol='m3u8_native',
+ m3u8_id='hls', headers=headers))
+ elif 'package:dash' in tags or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ format_url, video_id, mpd_id='dash', headers=headers))
+ elif ext == 'f4m':
+ # produce broken files
+ pass
+ else:
+ formats.append({
+ 'url': format_url,
+ 'width': int_or_none(playback_set.get('width')),
+ 'height': int_or_none(playback_set.get('height')),
+ })
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ geo_restricted = True
+ continue
+ if not formats and geo_restricted:
+ self.raise_geo_restricted(countries=['IN'])
+ self._sort_formats(formats)
+
+ for f in formats:
+ f.setdefault('http_headers', {}).update(headers)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('description'),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'timestamp': int_or_none(video_data.get('broadcastDate') or video_data.get('startDate')),
+ 'formats': formats,
+ 'channel': video_data.get('channelName'),
+ 'channel_id': video_data.get('channelId'),
+ 'series': video_data.get('showName'),
+ 'season': video_data.get('seasonName'),
+ 'season_number': int_or_none(video_data.get('seasonNo')),
+ 'season_id': video_data.get('seasonId'),
+ 'episode': title,
+ 'episode_number': int_or_none(video_data.get('episodeNo')),
+ }
+
+
+class HotStarPlaylistIE(HotStarBaseIE):
+ IE_NAME = 'hotstar:playlist'
+ _VALID_URL = r'https?://(?:www\.)?hotstar\.com/tv/[^/]+/s-\w+/list/[^/]+/t-(?P<id>\w+)'
+ _TESTS = [{
+ 'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/popular-clips/t-3_2_26',
+ 'info_dict': {
+ 'id': '3_2_26',
+ },
+ 'playlist_mincount': 20,
+ }, {
+ 'url': 'https://www.hotstar.com/tv/savdhaan-india/s-26/list/extras/t-2480',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ collection = self._call_api('o/v1/tray/find', playlist_id, 'uqId')
+
+ entries = [
+ self.url_result(
+ 'https://www.hotstar.com/%s' % video['contentId'],
+ ie=HotStarIE.ie_key(), video_id=video['contentId'])
+ for video in collection['assets']['items']
+ if video.get('contentId')]
+
+ return self.playlist_result(entries, playlist_id)
diff --git a/youtube_dl/extractor/howcast.py b/youtube_dlc/extractor/howcast.py
index 7e36b85ad..7e36b85ad 100644
--- a/youtube_dl/extractor/howcast.py
+++ b/youtube_dlc/extractor/howcast.py
diff --git a/youtube_dl/extractor/howstuffworks.py b/youtube_dlc/extractor/howstuffworks.py
index cf90ab3c9..cf90ab3c9 100644
--- a/youtube_dl/extractor/howstuffworks.py
+++ b/youtube_dlc/extractor/howstuffworks.py
diff --git a/youtube_dlc/extractor/hrfensehen.py b/youtube_dlc/extractor/hrfensehen.py
new file mode 100644
index 000000000..805345e69
--- /dev/null
+++ b/youtube_dlc/extractor/hrfensehen.py
@@ -0,0 +1,102 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from youtube_dlc.utils import int_or_none, unified_timestamp, unescapeHTML
+from .common import InfoExtractor
+
+
+class HRFernsehenIE(InfoExtractor):
+ IE_NAME = 'hrfernsehen'
+ _VALID_URL = r'^https?://www\.(?:hr-fernsehen|hessenschau)\.de/.*,video-(?P<id>[0-9]{6})\.html'
+
+ _TESTS = [{
+ 'url': 'https://www.hessenschau.de/tv-sendung/hessenschau-vom-26082020,video-130546.html',
+ 'md5': '5c4e0ba94677c516a2f65a84110fc536',
+ 'info_dict': {
+ 'id': '130546',
+ 'ext': 'mp4',
+ 'description': 'Sturmtief Kirsten fegt über Hessen / Die Corona-Pandemie – eine Chronologie / '
+ 'Sterbehilfe: Die Lage in Hessen / Miss Hessen leitet zwei eigene Unternehmen / '
+ 'Pop-Up Museum zeigt Schwarze Unterhaltung und Black Music',
+ 'subtitles': {'de': [{
+ 'url': 'https://hr-a.akamaihd.net/video/as/hessenschau/2020_08/hrLogo_200826200407_L385592_512x288-25p-500kbit.vtt'
+ }]},
+ 'timestamp': 1598470200,
+ 'upload_date': '20200826',
+ 'thumbnails': [{
+ 'url': 'https://www.hessenschau.de/tv-sendung/hs_ganz-1554~_t-1598465545029_v-16to9.jpg',
+ 'id': '0'
+ }, {
+ 'url': 'https://www.hessenschau.de/tv-sendung/hs_ganz-1554~_t-1598465545029_v-16to9__medium.jpg',
+ 'id': '1'
+ }],
+ 'title': 'hessenschau vom 26.08.2020'
+ }
+ }, {
+ 'url': 'https://www.hr-fernsehen.de/sendungen-a-z/mex/sendungen/fair-und-gut---was-hinter-aldis-eigenem-guetesiegel-steckt,video-130544.html',
+ 'only_matching': True
+ }]
+
+ _GEO_COUNTRIES = ['DE']
+
+ def extract_airdate(self, loader_data):
+ airdate_str = loader_data.get('mediaMetadata', {}).get('agf', {}).get('airdate')
+
+ if airdate_str is None:
+ return None
+
+ return unified_timestamp(airdate_str)
+
+ def extract_formats(self, loader_data):
+ stream_formats = []
+ for stream_obj in loader_data["videoResolutionLevels"]:
+ stream_format = {
+ 'format_id': str(stream_obj['verticalResolution']) + "p",
+ 'height': stream_obj['verticalResolution'],
+ 'url': stream_obj['url'],
+ }
+
+ quality_information = re.search(r'([0-9]{3,4})x([0-9]{3,4})-([0-9]{2})p-([0-9]{3,4})kbit',
+ stream_obj['url'])
+ if quality_information:
+ stream_format['width'] = int_or_none(quality_information.group(1))
+ stream_format['height'] = int_or_none(quality_information.group(2))
+ stream_format['fps'] = int_or_none(quality_information.group(3))
+ stream_format['tbr'] = int_or_none(quality_information.group(4))
+
+ stream_formats.append(stream_format)
+
+ self._sort_formats(stream_formats)
+ return stream_formats
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_meta(
+ ['og:title', 'twitter:title', 'name'], webpage)
+ description = self._html_search_meta(
+ ['description'], webpage)
+
+ loader_str = unescapeHTML(self._search_regex(r"data-hr-mediaplayer-loader='([^']*)'", webpage, "ardloader"))
+ loader_data = json.loads(loader_str)
+
+ info = {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'formats': self.extract_formats(loader_data),
+ 'timestamp': self.extract_airdate(loader_data)
+ }
+
+ if "subtitle" in loader_data:
+ info["subtitles"] = {"de": [{"url": loader_data["subtitle"]}]}
+
+ thumbnails = list(set([t for t in loader_data.get("previewImageUrl", {}).values()]))
+ if len(thumbnails) > 0:
+ info["thumbnails"] = [{"url": t} for t in thumbnails]
+
+ return info
diff --git a/youtube_dl/extractor/hrti.py b/youtube_dlc/extractor/hrti.py
index 23f7b1fc9..23f7b1fc9 100644
--- a/youtube_dl/extractor/hrti.py
+++ b/youtube_dlc/extractor/hrti.py
diff --git a/youtube_dl/extractor/huajiao.py b/youtube_dlc/extractor/huajiao.py
index 4ca275dda..4ca275dda 100644
--- a/youtube_dl/extractor/huajiao.py
+++ b/youtube_dlc/extractor/huajiao.py
diff --git a/youtube_dl/extractor/huffpost.py b/youtube_dlc/extractor/huffpost.py
index 97e36f056..97e36f056 100644
--- a/youtube_dl/extractor/huffpost.py
+++ b/youtube_dlc/extractor/huffpost.py
diff --git a/youtube_dl/extractor/hungama.py b/youtube_dlc/extractor/hungama.py
index 3fdaac5b6..3fdaac5b6 100644
--- a/youtube_dl/extractor/hungama.py
+++ b/youtube_dlc/extractor/hungama.py
diff --git a/youtube_dl/extractor/hypem.py b/youtube_dlc/extractor/hypem.py
index 9ca28d632..9ca28d632 100644
--- a/youtube_dl/extractor/hypem.py
+++ b/youtube_dlc/extractor/hypem.py
diff --git a/youtube_dl/extractor/ign.py b/youtube_dlc/extractor/ign.py
index a96ea8010..a96ea8010 100644
--- a/youtube_dl/extractor/ign.py
+++ b/youtube_dlc/extractor/ign.py
diff --git a/youtube_dlc/extractor/imdb.py b/youtube_dlc/extractor/imdb.py
new file mode 100644
index 000000000..a31301985
--- /dev/null
+++ b/youtube_dlc/extractor/imdb.py
@@ -0,0 +1,147 @@
+from __future__ import unicode_literals
+
+import base64
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ mimetype2ext,
+ parse_duration,
+ qualities,
+ try_get,
+ url_or_none,
+)
+
+
+class ImdbIE(InfoExtractor):
+ IE_NAME = 'imdb'
+ IE_DESC = 'Internet Movie Database trailers'
+ _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video|title|list).*?[/-]vi(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'http://www.imdb.com/video/imdb/vi2524815897',
+ 'info_dict': {
+ 'id': '2524815897',
+ 'ext': 'mp4',
+ 'title': 'No. 2',
+ 'description': 'md5:87bd0bdc61e351f21f20d2d7441cb4e7',
+ 'duration': 152,
+ }
+ }, {
+ 'url': 'http://www.imdb.com/video/_/vi2524815897',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.imdb.com/title/tt1667889/?ref_=ext_shr_eml_vi#lb-vi2524815897',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.imdb.com/title/tt1667889/#lb-vi2524815897',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.imdb.com/videoplayer/vi1562949145',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.imdb.com/title/tt4218696/videoplayer/vi2608641561',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.imdb.com/list/ls009921623/videoplayer/vi260482329',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ data = self._download_json(
+ 'https://www.imdb.com/ve/data/VIDEO_PLAYBACK_DATA', video_id,
+ query={
+ 'key': base64.b64encode(json.dumps({
+ 'type': 'VIDEO_PLAYER',
+ 'subType': 'FORCE_LEGACY',
+ 'id': 'vi%s' % video_id,
+ }).encode()).decode(),
+ })[0]
+
+ quality = qualities(('SD', '480p', '720p', '1080p'))
+ formats = []
+ for encoding in data['videoLegacyEncodings']:
+ if not encoding or not isinstance(encoding, dict):
+ continue
+ video_url = url_or_none(encoding.get('url'))
+ if not video_url:
+ continue
+ ext = mimetype2ext(encoding.get(
+ 'mimeType')) or determine_ext(video_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ preference=1, m3u8_id='hls', fatal=False))
+ continue
+ format_id = encoding.get('definition')
+ formats.append({
+ 'format_id': format_id,
+ 'url': video_url,
+ 'ext': ext,
+ 'quality': quality(format_id),
+ })
+ self._sort_formats(formats)
+
+ webpage = self._download_webpage(
+ 'https://www.imdb.com/video/vi' + video_id, video_id)
+ video_metadata = self._parse_json(self._search_regex(
+ r'args\.push\(\s*({.+?})\s*\)\s*;', webpage,
+ 'video metadata'), video_id)
+
+ video_info = video_metadata.get('VIDEO_INFO')
+ if video_info and isinstance(video_info, dict):
+ info = try_get(
+ video_info, lambda x: x[list(video_info.keys())[0]][0], dict)
+ else:
+ info = {}
+
+ title = self._html_search_meta(
+ ['og:title', 'twitter:title'], webpage) or self._html_search_regex(
+ r'<title>(.+?)</title>', webpage, 'title',
+ default=None) or info['videoTitle']
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'alt_title': info.get('videoSubTitle'),
+ 'formats': formats,
+ 'description': info.get('videoDescription'),
+ 'thumbnail': url_or_none(try_get(
+ video_metadata, lambda x: x['videoSlate']['source'])),
+ 'duration': parse_duration(info.get('videoRuntime')),
+ }
+
+
+class ImdbListIE(InfoExtractor):
+ IE_NAME = 'imdb:list'
+ IE_DESC = 'Internet Movie Database lists'
+ _VALID_URL = r'https?://(?:www\.)?imdb\.com/list/ls(?P<id>\d{9})(?!/videoplayer/vi\d+)'
+ _TEST = {
+ 'url': 'https://www.imdb.com/list/ls009921623/',
+ 'info_dict': {
+ 'id': '009921623',
+ 'title': 'The Bourne Legacy',
+ 'description': 'A list of trailers, clips, and more from The Bourne Legacy, starring Jeremy Renner and Rachel Weisz.',
+ },
+ 'playlist_count': 8,
+ }
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+ webpage = self._download_webpage(url, list_id)
+ entries = [
+ self.url_result('http://www.imdb.com' + m, 'Imdb')
+ for m in re.findall(r'href="(/list/ls%s/videoplayer/vi[^"]+)"' % list_id, webpage)]
+
+ list_title = self._html_search_regex(
+ r'<h1[^>]+class="[^"]*header[^"]*"[^>]*>(.*?)</h1>',
+ webpage, 'list title')
+ list_description = self._html_search_regex(
+ r'<div[^>]+class="[^"]*list-description[^"]*"[^>]*><p>(.*?)</p>',
+ webpage, 'list description')
+
+ return self.playlist_result(entries, list_id, list_title, list_description)
diff --git a/youtube_dlc/extractor/imggaming.py b/youtube_dlc/extractor/imggaming.py
new file mode 100644
index 000000000..e11f92053
--- /dev/null
+++ b/youtube_dlc/extractor/imggaming.py
@@ -0,0 +1,133 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ str_or_none,
+ try_get,
+)
+
+
+class ImgGamingBaseIE(InfoExtractor):
+ _API_BASE = 'https://dce-frontoffice.imggaming.com/api/v2/'
+ _API_KEY = '857a1e5d-e35e-4fdf-805b-a87b6f8364bf'
+ _HEADERS = None
+ _MANIFEST_HEADERS = {'Accept-Encoding': 'identity'}
+ _REALM = None
+ _VALID_URL_TEMPL = r'https?://(?P<domain>%s)/(?P<type>live|playlist|video)/(?P<id>\d+)(?:\?.*?\bplaylistId=(?P<playlist_id>\d+))?'
+
+ def _real_initialize(self):
+ self._HEADERS = {
+ 'Realm': 'dce.' + self._REALM,
+ 'x-api-key': self._API_KEY,
+ }
+
+ email, password = self._get_login_info()
+ if email is None:
+ self.raise_login_required()
+
+ p_headers = self._HEADERS.copy()
+ p_headers['Content-Type'] = 'application/json'
+ self._HEADERS['Authorization'] = 'Bearer ' + self._download_json(
+ self._API_BASE + 'login',
+ None, 'Logging in', data=json.dumps({
+ 'id': email,
+ 'secret': password,
+ }).encode(), headers=p_headers)['authorisationToken']
+
+ def _call_api(self, path, media_id):
+ return self._download_json(
+ self._API_BASE + path + media_id, media_id, headers=self._HEADERS)
+
+ def _extract_dve_api_url(self, media_id, media_type):
+ stream_path = 'stream'
+ if media_type == 'video':
+ stream_path += '/vod/'
+ else:
+ stream_path += '?eventId='
+ try:
+ return self._call_api(
+ stream_path, media_id)['playerUrlCallback']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ raise ExtractorError(
+ self._parse_json(e.cause.read().decode(), media_id)['messages'][0],
+ expected=True)
+ raise
+
+ def _real_extract(self, url):
+ domain, media_type, media_id, playlist_id = re.match(self._VALID_URL, url).groups()
+
+ if playlist_id:
+ if self._downloader.params.get('noplaylist'):
+ self.to_screen('Downloading just video %s because of --no-playlist' % media_id)
+ else:
+ self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % playlist_id)
+ media_type, media_id = 'playlist', playlist_id
+
+ if media_type == 'playlist':
+ playlist = self._call_api('vod/playlist/', media_id)
+ entries = []
+ for video in try_get(playlist, lambda x: x['videos']['vods']) or []:
+ video_id = str_or_none(video.get('id'))
+ if not video_id:
+ continue
+ entries.append(self.url_result(
+ 'https://%s/video/%s' % (domain, video_id),
+ self.ie_key(), video_id))
+ return self.playlist_result(
+ entries, media_id, playlist.get('title'),
+ playlist.get('description'))
+
+ dve_api_url = self._extract_dve_api_url(media_id, media_type)
+ video_data = self._download_json(dve_api_url, media_id)
+ is_live = media_type == 'live'
+ if is_live:
+ title = self._live_title(self._call_api('event/', media_id)['title'])
+ else:
+ title = video_data['name']
+
+ formats = []
+ for proto in ('hls', 'dash'):
+ media_url = video_data.get(proto + 'Url') or try_get(video_data, lambda x: x[proto]['url'])
+ if not media_url:
+ continue
+ if proto == 'hls':
+ m3u8_formats = self._extract_m3u8_formats(
+ media_url, media_id, 'mp4', 'm3u8' if is_live else 'm3u8_native',
+ m3u8_id='hls', fatal=False, headers=self._MANIFEST_HEADERS)
+ for f in m3u8_formats:
+ f.setdefault('http_headers', {}).update(self._MANIFEST_HEADERS)
+ formats.append(f)
+ else:
+ formats.extend(self._extract_mpd_formats(
+ media_url, media_id, mpd_id='dash', fatal=False,
+ headers=self._MANIFEST_HEADERS))
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for subtitle in video_data.get('subtitles', []):
+ subtitle_url = subtitle.get('url')
+ if not subtitle_url:
+ continue
+ subtitles.setdefault(subtitle.get('lang', 'en_US'), []).append({
+ 'url': subtitle_url,
+ })
+
+ return {
+ 'id': media_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': video_data.get('thumbnailUrl'),
+ 'description': video_data.get('description'),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'tags': video_data.get('tags'),
+ 'is_live': is_live,
+ 'subtitles': subtitles,
+ }
diff --git a/youtube_dlc/extractor/imgur.py b/youtube_dlc/extractor/imgur.py
new file mode 100644
index 000000000..4dc7b0b5c
--- /dev/null
+++ b/youtube_dlc/extractor/imgur.py
@@ -0,0 +1,154 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ js_to_json,
+ mimetype2ext,
+ ExtractorError,
+)
+
+
+class ImgurIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?!(?:a|gallery|(?:t(?:opic)?|r)/[^/]+)/)(?P<id>[a-zA-Z0-9]+)'
+
+ _TESTS = [{
+ 'url': 'https://i.imgur.com/A61SaA1.gifv',
+ 'info_dict': {
+ 'id': 'A61SaA1',
+ 'ext': 'mp4',
+ 'title': 're:Imgur GIF$|MRW gifv is up and running without any bugs$',
+ },
+ }, {
+ 'url': 'https://imgur.com/A61SaA1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://i.imgur.com/crGpqCV.mp4',
+ 'only_matching': True,
+ }, {
+ # no title
+ 'url': 'https://i.imgur.com/jxBXAMC.gifv',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(
+ 'https://i.imgur.com/{id}.gifv'.format(id=video_id), video_id)
+
+ width = int_or_none(self._og_search_property(
+ 'video:width', webpage, default=None))
+ height = int_or_none(self._og_search_property(
+ 'video:height', webpage, default=None))
+
+ video_elements = self._search_regex(
+ r'(?s)<div class="video-elements">(.*?)</div>',
+ webpage, 'video elements', default=None)
+ if not video_elements:
+ raise ExtractorError(
+ 'No sources found for video %s. Maybe an image?' % video_id,
+ expected=True)
+
+ formats = []
+ for m in re.finditer(r'<source\s+src="(?P<src>[^"]+)"\s+type="(?P<type>[^"]+)"', video_elements):
+ formats.append({
+ 'format_id': m.group('type').partition('/')[2],
+ 'url': self._proto_relative_url(m.group('src')),
+ 'ext': mimetype2ext(m.group('type')),
+ 'width': width,
+ 'height': height,
+ 'http_headers': {
+ 'User-Agent': 'youtube-dlc (like wget)',
+ },
+ })
+
+ gif_json = self._search_regex(
+ r'(?s)var\s+videoItem\s*=\s*(\{.*?\})',
+ webpage, 'GIF code', fatal=False)
+ if gif_json:
+ gifd = self._parse_json(
+ gif_json, video_id, transform_source=js_to_json)
+ formats.append({
+ 'format_id': 'gif',
+ 'preference': -10,
+ 'width': width,
+ 'height': height,
+ 'ext': 'gif',
+ 'acodec': 'none',
+ 'vcodec': 'gif',
+ 'container': 'gif',
+ 'url': self._proto_relative_url(gifd['gifUrl']),
+ 'filesize': gifd.get('size'),
+ 'http_headers': {
+ 'User-Agent': 'youtube-dlc (like wget)',
+ },
+ })
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': self._og_search_title(webpage, default=video_id),
+ }
+
+
+class ImgurGalleryIE(InfoExtractor):
+ IE_NAME = 'imgur:gallery'
+ _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?:gallery|(?:t(?:opic)?|r)/[^/]+)/(?P<id>[a-zA-Z0-9]+)'
+
+ _TESTS = [{
+ 'url': 'http://imgur.com/gallery/Q95ko',
+ 'info_dict': {
+ 'id': 'Q95ko',
+ 'title': 'Adding faces make every GIF better',
+ },
+ 'playlist_count': 25,
+ }, {
+ 'url': 'http://imgur.com/topic/Aww/ll5Vk',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://imgur.com/gallery/YcAQlkx',
+ 'info_dict': {
+ 'id': 'YcAQlkx',
+ 'ext': 'mp4',
+ 'title': 'Classic Steve Carell gif...cracks me up everytime....damn the repost downvotes....',
+ }
+ }, {
+ 'url': 'http://imgur.com/topic/Funny/N8rOudd',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://imgur.com/r/aww/VQcQPhM',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ gallery_id = self._match_id(url)
+
+ data = self._download_json(
+ 'https://imgur.com/gallery/%s.json' % gallery_id,
+ gallery_id)['data']['image']
+
+ if data.get('is_album'):
+ entries = [
+ self.url_result('http://imgur.com/%s' % image['hash'], ImgurIE.ie_key(), image['hash'])
+ for image in data['album_images']['images'] if image.get('hash')]
+ return self.playlist_result(entries, gallery_id, data.get('title'), data.get('description'))
+
+ return self.url_result('http://imgur.com/%s' % gallery_id, ImgurIE.ie_key(), gallery_id)
+
+
+class ImgurAlbumIE(ImgurGalleryIE):
+ IE_NAME = 'imgur:album'
+ _VALID_URL = r'https?://(?:i\.)?imgur\.com/a/(?P<id>[a-zA-Z0-9]+)'
+
+ _TESTS = [{
+ 'url': 'http://imgur.com/a/j6Orj',
+ 'info_dict': {
+ 'id': 'j6Orj',
+ 'title': 'A Literary Analysis of "Star Wars: The Force Awakens"',
+ },
+ 'playlist_count': 12,
+ }]
diff --git a/youtube_dl/extractor/ina.py b/youtube_dlc/extractor/ina.py
index 12695af27..12695af27 100644
--- a/youtube_dl/extractor/ina.py
+++ b/youtube_dlc/extractor/ina.py
diff --git a/youtube_dl/extractor/inc.py b/youtube_dlc/extractor/inc.py
index d5b258a0f..d5b258a0f 100644
--- a/youtube_dl/extractor/inc.py
+++ b/youtube_dlc/extractor/inc.py
diff --git a/youtube_dlc/extractor/indavideo.py b/youtube_dlc/extractor/indavideo.py
new file mode 100644
index 000000000..4c16243ec
--- /dev/null
+++ b/youtube_dlc/extractor/indavideo.py
@@ -0,0 +1,128 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ parse_age_limit,
+ parse_iso8601,
+ update_url_query,
+)
+
+
+class IndavideoEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:embed\.)?indavideo\.hu/player/video/|assets\.indavideo\.hu/swf/player\.swf\?.*\b(?:v(?:ID|id))=)(?P<id>[\da-f]+)'
+ _TESTS = [{
+ 'url': 'http://indavideo.hu/player/video/1bdc3c6d80/',
+ 'md5': 'c8a507a1c7410685f83a06eaeeaafeab',
+ 'info_dict': {
+ 'id': '1837039',
+ 'ext': 'mp4',
+ 'title': 'Cicatánc',
+ 'description': '',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'cukiajanlo',
+ 'uploader_id': '83729',
+ 'timestamp': 1439193826,
+ 'upload_date': '20150810',
+ 'duration': 72,
+ 'age_limit': 0,
+ 'tags': ['tánc', 'cica', 'cuki', 'cukiajanlo', 'newsroom'],
+ },
+ }, {
+ 'url': 'http://embed.indavideo.hu/player/video/1bdc3c6d80?autostart=1&hide=1',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://assets.indavideo.hu/swf/player.swf?v=fe25e500&vID=1bdc3c6d80&autostart=1&hide=1&i=1',
+ 'only_matching': True,
+ }]
+
+ # Some example URLs covered by generic extractor:
+ # http://indavideo.hu/video/Vicces_cica_1
+ # http://index.indavideo.hu/video/2015_0728_beregszasz
+ # http://auto.indavideo.hu/video/Sajat_utanfutoban_a_kis_tacsko
+ # http://erotika.indavideo.hu/video/Amator_tini_punci
+ # http://film.indavideo.hu/video/f_hrom_nagymamm_volt
+ # http://palyazat.indavideo.hu/video/Embertelen_dal_Dodgem_egyuttes
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//embed\.indavideo\.hu/player/video/[\da-f]+)',
+ webpage)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'https://amfphp.indavideo.hu/SYm0json.php/player.playerHandler.getVideoData/%s' % video_id,
+ video_id)['data']
+
+ title = video['title']
+
+ video_urls = []
+
+ video_files = video.get('video_files')
+ if isinstance(video_files, list):
+ video_urls.extend(video_files)
+ elif isinstance(video_files, dict):
+ video_urls.extend(video_files.values())
+
+ video_file = video.get('video_file')
+ if video:
+ video_urls.append(video_file)
+ video_urls = list(set(video_urls))
+
+ video_prefix = video_urls[0].rsplit('/', 1)[0]
+
+ for flv_file in video.get('flv_files', []):
+ flv_url = '%s/%s' % (video_prefix, flv_file)
+ if flv_url not in video_urls:
+ video_urls.append(flv_url)
+
+ filesh = video.get('filesh')
+
+ formats = []
+ for video_url in video_urls:
+ height = int_or_none(self._search_regex(
+ r'\.(\d{3,4})\.mp4(?:\?|$)', video_url, 'height', default=None))
+ if filesh:
+ if not height:
+ continue
+ token = filesh.get(compat_str(height))
+ if token is None:
+ continue
+ video_url = update_url_query(video_url, {'token': token})
+ formats.append({
+ 'url': video_url,
+ 'height': height,
+ })
+ self._sort_formats(formats)
+
+ timestamp = video.get('date')
+ if timestamp:
+ # upload date is in CEST
+ timestamp = parse_iso8601(timestamp + ' +0200', ' ')
+
+ thumbnails = [{
+ 'url': self._proto_relative_url(thumbnail)
+ } for thumbnail in video.get('thumbnails', [])]
+
+ tags = [tag['title'] for tag in video.get('tags') or []]
+
+ return {
+ 'id': video.get('id') or video_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'thumbnails': thumbnails,
+ 'uploader': video.get('user_name'),
+ 'uploader_id': video.get('user_id'),
+ 'timestamp': timestamp,
+ 'duration': int_or_none(video.get('length')),
+ 'age_limit': parse_age_limit(video.get('age_limit')),
+ 'tags': tags,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/infoq.py b/youtube_dlc/extractor/infoq.py
index 18249cf9b..18249cf9b 100644
--- a/youtube_dl/extractor/infoq.py
+++ b/youtube_dlc/extractor/infoq.py
diff --git a/youtube_dl/extractor/instagram.py b/youtube_dlc/extractor/instagram.py
index b061850a1..b061850a1 100644
--- a/youtube_dl/extractor/instagram.py
+++ b/youtube_dlc/extractor/instagram.py
diff --git a/youtube_dl/extractor/internazionale.py b/youtube_dlc/extractor/internazionale.py
index 676e8e269..676e8e269 100644
--- a/youtube_dl/extractor/internazionale.py
+++ b/youtube_dlc/extractor/internazionale.py
diff --git a/youtube_dlc/extractor/internetvideoarchive.py b/youtube_dlc/extractor/internetvideoarchive.py
new file mode 100644
index 000000000..59b0a90c3
--- /dev/null
+++ b/youtube_dlc/extractor/internetvideoarchive.py
@@ -0,0 +1,64 @@
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_parse_qs,
+ compat_urlparse,
+)
+
+
+class InternetVideoArchiveIE(InfoExtractor):
+ _VALID_URL = r'https?://video\.internetvideoarchive\.net/(?:player|flash/players)/.*?\?.*?publishedid.*?'
+
+ _TEST = {
+ 'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?customerid=69249&publishedid=194487&reporttag=vdbetatitle&playerid=641&autolist=0&domain=www.videodetective.com&maxrate=high&minrate=low&socialplayer=false',
+ 'info_dict': {
+ 'id': '194487',
+ 'ext': 'mp4',
+ 'title': 'Kick-Ass 2',
+ 'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ @staticmethod
+ def _build_json_url(query):
+ return 'http://video.internetvideoarchive.net/player/6/configuration.ashx?' + query
+
+ def _real_extract(self, url):
+ query = compat_parse_qs(compat_urlparse.urlparse(url).query)
+ video_id = query['publishedid'][0]
+ data = self._download_json(
+ 'https://video.internetvideoarchive.net/videojs7/videojs7.ivasettings.ashx',
+ video_id, data=json.dumps({
+ 'customerid': query['customerid'][0],
+ 'publishedid': video_id,
+ }).encode())
+ title = data['Title']
+ formats = self._extract_m3u8_formats(
+ data['VideoUrl'], video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False)
+ file_url = formats[0]['url']
+ if '.ism/' in file_url:
+ replace_url = lambda x: re.sub(r'\.ism/[^?]+', '.ism/' + x, file_url)
+ formats.extend(self._extract_f4m_formats(
+ replace_url('.f4m'), video_id, f4m_id='hds', fatal=False))
+ formats.extend(self._extract_mpd_formats(
+ replace_url('.mpd'), video_id, mpd_id='dash', fatal=False))
+ formats.extend(self._extract_ism_formats(
+ replace_url('Manifest'), video_id, ism_id='mss', fatal=False))
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': data.get('PosterUrl'),
+ 'description': data.get('Description'),
+ }
diff --git a/youtube_dlc/extractor/iprima.py b/youtube_dlc/extractor/iprima.py
new file mode 100644
index 000000000..53a550c11
--- /dev/null
+++ b/youtube_dlc/extractor/iprima.py
@@ -0,0 +1,148 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import time
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ js_to_json,
+)
+
+
+class IPrimaIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+)\.iprima\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _GEO_BYPASS = False
+
+ _TESTS = [{
+ 'url': 'https://prima.iprima.cz/particka/92-epizoda',
+ 'info_dict': {
+ 'id': 'p51388',
+ 'ext': 'mp4',
+ 'title': 'Partička (92)',
+ 'description': 'md5:859d53beae4609e6dd7796413f1b6cac',
+ },
+ 'params': {
+ 'skip_download': True, # m3u8 download
+ },
+ }, {
+ 'url': 'https://cnn.iprima.cz/videa/70-epizoda',
+ 'info_dict': {
+ 'id': 'p681554',
+ 'ext': 'mp4',
+ 'title': 'HLAVNÍ ZPRÁVY 3.5.2020',
+ },
+ 'params': {
+ 'skip_download': True, # m3u8 download
+ },
+ }, {
+ 'url': 'http://play.iprima.cz/particka/particka-92',
+ 'only_matching': True,
+ }, {
+ # geo restricted
+ 'url': 'http://play.iprima.cz/closer-nove-pripady/closer-nove-pripady-iv-1',
+ 'only_matching': True,
+ }, {
+ # iframe api.play-backend.iprima.cz
+ 'url': 'https://prima.iprima.cz/my-little-pony/mapa-znameni-2-2',
+ 'only_matching': True,
+ }, {
+ # iframe prima.iprima.cz
+ 'url': 'https://prima.iprima.cz/porady/jak-se-stavi-sen/rodina-rathousova-praha',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.iprima.cz/filmy/desne-rande',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://zoom.iprima.cz/10-nejvetsich-tajemstvi-zahad/posvatna-mista-a-stavby',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://krimi.iprima.cz/mraz-0/sebevrazdy',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://cool.iprima.cz/derava-silnice-nevadi',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://love.iprima.cz/laska-az-za-hrob/slib-dany-bratrovi',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://autosalon.iprima.cz/motorsport/7-epizoda-1',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ self._set_cookie('play.iprima.cz', 'ott_adult_confirmed', '1')
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._og_search_title(
+ webpage, default=None) or self._search_regex(
+ r'<h1>([^<]+)', webpage, 'title')
+
+ video_id = self._search_regex(
+ (r'<iframe[^>]+\bsrc=["\'](?:https?:)?//(?:api\.play-backend\.iprima\.cz/prehravac/embedded|prima\.iprima\.cz/[^/]+/[^/]+)\?.*?\bid=(p\d+)',
+ r'data-product="([^"]+)">',
+ r'id=["\']player-(p\d+)"',
+ r'playerId\s*:\s*["\']player-(p\d+)'),
+ webpage, 'real id')
+
+ playerpage = self._download_webpage(
+ 'http://play.iprima.cz/prehravac/init',
+ video_id, note='Downloading player', query={
+ '_infuse': 1,
+ '_ts': round(time.time()),
+ 'productId': video_id,
+ }, headers={'Referer': url})
+
+ formats = []
+
+ def extract_formats(format_url, format_key=None, lang=None):
+ ext = determine_ext(format_url)
+ new_formats = []
+ if format_key == 'hls' or ext == 'm3u8':
+ new_formats = self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False)
+ elif format_key == 'dash' or ext == 'mpd':
+ return
+ new_formats = self._extract_mpd_formats(
+ format_url, video_id, mpd_id='dash', fatal=False)
+ if lang:
+ for f in new_formats:
+ if not f.get('language'):
+ f['language'] = lang
+ formats.extend(new_formats)
+
+ options = self._parse_json(
+ self._search_regex(
+ r'(?s)(?:TDIPlayerOptions|playerOptions)\s*=\s*({.+?});\s*\]\]',
+ playerpage, 'player options', default='{}'),
+ video_id, transform_source=js_to_json, fatal=False)
+ if options:
+ for key, tracks in options.get('tracks', {}).items():
+ if not isinstance(tracks, list):
+ continue
+ for track in tracks:
+ src = track.get('src')
+ if src:
+ extract_formats(src, key.lower(), track.get('lang'))
+
+ if not formats:
+ for _, src in re.findall(r'src["\']\s*:\s*(["\'])(.+?)\1', playerpage):
+ extract_formats(src)
+
+ if not formats and '>GEO_IP_NOT_ALLOWED<' in playerpage:
+ self.raise_geo_restricted(countries=['CZ'])
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
+ 'formats': formats,
+ 'description': self._og_search_description(webpage, default=None),
+ }
diff --git a/youtube_dl/extractor/iqiyi.py b/youtube_dlc/extractor/iqiyi.py
index cd11aa70f..cd11aa70f 100644
--- a/youtube_dl/extractor/iqiyi.py
+++ b/youtube_dlc/extractor/iqiyi.py
diff --git a/youtube_dl/extractor/ir90tv.py b/youtube_dlc/extractor/ir90tv.py
index d5a3f6fa5..d5a3f6fa5 100644
--- a/youtube_dl/extractor/ir90tv.py
+++ b/youtube_dlc/extractor/ir90tv.py
diff --git a/youtube_dl/extractor/itv.py b/youtube_dlc/extractor/itv.py
index ad2f4eca5..ad2f4eca5 100644
--- a/youtube_dl/extractor/itv.py
+++ b/youtube_dlc/extractor/itv.py
diff --git a/youtube_dlc/extractor/ivi.py b/youtube_dlc/extractor/ivi.py
new file mode 100644
index 000000000..b9cb5a8e6
--- /dev/null
+++ b/youtube_dlc/extractor/ivi.py
@@ -0,0 +1,271 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+import sys
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ qualities,
+)
+
+
+class IviIE(InfoExtractor):
+ IE_DESC = 'ivi.ru'
+ IE_NAME = 'ivi'
+ _VALID_URL = r'https?://(?:www\.)?ivi\.(?:ru|tv)/(?:watch/(?:[^/]+/)?|video/player\?.*?videoId=)(?P<id>\d+)'
+ _GEO_BYPASS = False
+ _GEO_COUNTRIES = ['RU']
+ _LIGHT_KEY = b'\xf1\x02\x32\xb7\xbc\x5c\x7a\xe8\xf7\x96\xc1\x33\x2b\x27\xa1\x8c'
+ _LIGHT_URL = 'https://api.ivi.ru/light/'
+
+ _TESTS = [
+ # Single movie
+ {
+ 'url': 'http://www.ivi.ru/watch/53141',
+ 'md5': '6ff5be2254e796ed346251d117196cf4',
+ 'info_dict': {
+ 'id': '53141',
+ 'ext': 'mp4',
+ 'title': 'Иван Васильевич меняет профессию',
+ 'description': 'md5:b924063ea1677c8fe343d8a72ac2195f',
+ 'duration': 5498,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'skip': 'Only works from Russia',
+ },
+ # Serial's series
+ {
+ 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/9549',
+ 'md5': '221f56b35e3ed815fde2df71032f4b3e',
+ 'info_dict': {
+ 'id': '9549',
+ 'ext': 'mp4',
+ 'title': 'Двое из ларца - Дело Гольдберга (1 часть)',
+ 'series': 'Двое из ларца',
+ 'season': 'Сезон 1',
+ 'season_number': 1,
+ 'episode': 'Дело Гольдберга (1 часть)',
+ 'episode_number': 1,
+ 'duration': 2655,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'skip': 'Only works from Russia',
+ },
+ {
+ # with MP4-HD720 format
+ 'url': 'http://www.ivi.ru/watch/146500',
+ 'md5': 'd63d35cdbfa1ea61a5eafec7cc523e1e',
+ 'info_dict': {
+ 'id': '146500',
+ 'ext': 'mp4',
+ 'title': 'Кукла',
+ 'description': 'md5:ffca9372399976a2d260a407cc74cce6',
+ 'duration': 5599,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'skip': 'Only works from Russia',
+ },
+ {
+ 'url': 'https://www.ivi.tv/watch/33560/',
+ 'only_matching': True,
+ },
+ ]
+
+ # Sorted by quality
+ _KNOWN_FORMATS = (
+ 'MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi',
+ 'MP4-SHQ', 'MP4-HD720', 'MP4-HD1080')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ data = json.dumps({
+ 'method': 'da.content.get',
+ 'params': [
+ video_id, {
+ 'site': 's%d',
+ 'referrer': 'http://www.ivi.ru/watch/%s' % video_id,
+ 'contentid': video_id
+ }
+ ]
+ })
+
+ bundled = hasattr(sys, 'frozen')
+
+ for site in (353, 183):
+ content_data = (data % site).encode()
+ if site == 353:
+ if bundled:
+ continue
+ try:
+ from Cryptodome.Cipher import Blowfish
+ from Cryptodome.Hash import CMAC
+ pycryptodomex_found = True
+ except ImportError:
+ pycryptodomex_found = False
+ continue
+
+ timestamp = (self._download_json(
+ self._LIGHT_URL, video_id,
+ 'Downloading timestamp JSON', data=json.dumps({
+ 'method': 'da.timestamp.get',
+ 'params': []
+ }).encode(), fatal=False) or {}).get('result')
+ if not timestamp:
+ continue
+
+ query = {
+ 'ts': timestamp,
+ 'sign': CMAC.new(self._LIGHT_KEY, timestamp.encode() + content_data, Blowfish).hexdigest(),
+ }
+ else:
+ query = {}
+
+ video_json = self._download_json(
+ self._LIGHT_URL, video_id,
+ 'Downloading video JSON', data=content_data, query=query)
+
+ error = video_json.get('error')
+ if error:
+ origin = error.get('origin')
+ message = error.get('message') or error.get('user_message')
+ extractor_msg = 'Unable to download video %s'
+ if origin == 'NotAllowedForLocation':
+ self.raise_geo_restricted(message, self._GEO_COUNTRIES)
+ elif origin == 'NoRedisValidData':
+ extractor_msg = 'Video %s does not exist'
+ elif site == 353:
+ continue
+ elif bundled:
+ raise ExtractorError(
+ 'This feature does not work from bundled exe. Run youtube-dlc from sources.',
+ expected=True)
+ elif not pycryptodomex_found:
+ raise ExtractorError(
+ 'pycryptodomex not found. Please install it.',
+ expected=True)
+ elif message:
+ extractor_msg += ': ' + message
+ raise ExtractorError(extractor_msg % video_id, expected=True)
+ else:
+ break
+
+ result = video_json['result']
+ title = result['title']
+
+ quality = qualities(self._KNOWN_FORMATS)
+
+ formats = []
+ for f in result.get('files', []):
+ f_url = f.get('url')
+ content_format = f.get('content_format')
+ if not f_url or '-MDRM-' in content_format or '-FPS-' in content_format:
+ continue
+ formats.append({
+ 'url': f_url,
+ 'format_id': content_format,
+ 'quality': quality(content_format),
+ 'filesize': int_or_none(f.get('size_in_bytes')),
+ })
+ self._sort_formats(formats)
+
+ compilation = result.get('compilation')
+ episode = title if compilation else None
+
+ title = '%s - %s' % (compilation, title) if compilation is not None else title
+
+ thumbnails = [{
+ 'url': preview['url'],
+ 'id': preview.get('content_format'),
+ } for preview in result.get('preview', []) if preview.get('url')]
+
+ webpage = self._download_webpage(url, video_id)
+
+ season = self._search_regex(
+ r'<li[^>]+class="season active"[^>]*><a[^>]+>([^<]+)',
+ webpage, 'season', default=None)
+ season_number = int_or_none(self._search_regex(
+ r'<li[^>]+class="season active"[^>]*><a[^>]+data-season(?:-index)?="(\d+)"',
+ webpage, 'season number', default=None))
+
+ episode_number = int_or_none(self._search_regex(
+ r'[^>]+itemprop="episode"[^>]*>\s*<meta[^>]+itemprop="episodeNumber"[^>]+content="(\d+)',
+ webpage, 'episode number', default=None))
+
+ description = self._og_search_description(webpage, default=None) or self._html_search_meta(
+ 'description', webpage, 'description', default=None)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'series': compilation,
+ 'season': season,
+ 'season_number': season_number,
+ 'episode': episode,
+ 'episode_number': episode_number,
+ 'thumbnails': thumbnails,
+ 'description': description,
+ 'duration': int_or_none(result.get('duration')),
+ 'formats': formats,
+ }
+
+
+class IviCompilationIE(InfoExtractor):
+ IE_DESC = 'ivi.ru compilations'
+ IE_NAME = 'ivi:compilation'
+ _VALID_URL = r'https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$'
+ _TESTS = [{
+ 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa',
+ 'info_dict': {
+ 'id': 'dvoe_iz_lartsa',
+ 'title': 'Двое из ларца (2006 - 2008)',
+ },
+ 'playlist_mincount': 24,
+ }, {
+ 'url': 'http://www.ivi.ru/watch/dvoe_iz_lartsa/season1',
+ 'info_dict': {
+ 'id': 'dvoe_iz_lartsa/season1',
+ 'title': 'Двое из ларца (2006 - 2008) 1 сезон',
+ },
+ 'playlist_mincount': 12,
+ }]
+
+ def _extract_entries(self, html, compilation_id):
+ return [
+ self.url_result(
+ 'http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), IviIE.ie_key())
+ for serie in re.findall(
+ r'<a\b[^>]+\bhref=["\']/watch/%s/(\d+)["\']' % compilation_id, html)]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ compilation_id = mobj.group('compilationid')
+ season_id = mobj.group('seasonid')
+
+ if season_id is not None: # Season link
+ season_page = self._download_webpage(
+ url, compilation_id, 'Downloading season %s web page' % season_id)
+ playlist_id = '%s/season%s' % (compilation_id, season_id)
+ playlist_title = self._html_search_meta('title', season_page, 'title')
+ entries = self._extract_entries(season_page, compilation_id)
+ else: # Compilation link
+ compilation_page = self._download_webpage(url, compilation_id, 'Downloading compilation web page')
+ playlist_id = compilation_id
+ playlist_title = self._html_search_meta('title', compilation_page, 'title')
+ seasons = re.findall(
+ r'<a href="/watch/%s/season(\d+)' % compilation_id, compilation_page)
+ if not seasons: # No seasons in this compilation
+ entries = self._extract_entries(compilation_page, compilation_id)
+ else:
+ entries = []
+ for season_id in seasons:
+ season_page = self._download_webpage(
+ 'http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id),
+ compilation_id, 'Downloading season %s web page' % season_id)
+ entries.extend(self._extract_entries(season_page, compilation_id))
+
+ return self.playlist_result(entries, playlist_id, playlist_title)
diff --git a/youtube_dl/extractor/ivideon.py b/youtube_dlc/extractor/ivideon.py
index 3ca824f79..3ca824f79 100644
--- a/youtube_dl/extractor/ivideon.py
+++ b/youtube_dlc/extractor/ivideon.py
diff --git a/youtube_dl/extractor/iwara.py b/youtube_dlc/extractor/iwara.py
index 907d5fc8b..907d5fc8b 100644
--- a/youtube_dl/extractor/iwara.py
+++ b/youtube_dlc/extractor/iwara.py
diff --git a/youtube_dl/extractor/izlesene.py b/youtube_dlc/extractor/izlesene.py
index f8fca6c8f..f8fca6c8f 100644
--- a/youtube_dl/extractor/izlesene.py
+++ b/youtube_dlc/extractor/izlesene.py
diff --git a/youtube_dlc/extractor/jamendo.py b/youtube_dlc/extractor/jamendo.py
new file mode 100644
index 000000000..490efa8fb
--- /dev/null
+++ b/youtube_dlc/extractor/jamendo.py
@@ -0,0 +1,187 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import hashlib
+import random
+
+from ..compat import compat_str
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ int_or_none,
+ try_get,
+)
+
+
+class JamendoIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ licensing\.jamendo\.com/[^/]+|
+ (?:www\.)?jamendo\.com
+ )
+ /track/(?P<id>[0-9]+)(?:/(?P<display_id>[^/?#&]+))?
+ '''
+ _TESTS = [{
+ 'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i',
+ 'md5': '6e9e82ed6db98678f171c25a8ed09ffd',
+ 'info_dict': {
+ 'id': '196219',
+ 'display_id': 'stories-from-emona-i',
+ 'ext': 'flac',
+ 'title': 'Maya Filipič - Stories from Emona I',
+ 'artist': 'Maya Filipič',
+ 'track': 'Stories from Emona I',
+ 'duration': 210,
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'timestamp': 1217438117,
+ 'upload_date': '20080730',
+ }
+ }, {
+ 'url': 'https://licensing.jamendo.com/en/track/1496667/energetic-rock',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ track_id, display_id = self._VALID_URL_RE.match(url).groups()
+ webpage = self._download_webpage(
+ 'https://www.jamendo.com/track/' + track_id, track_id)
+ models = self._parse_json(self._html_search_regex(
+ r"data-bundled-models='([^']+)",
+ webpage, 'bundled models'), track_id)
+ track = models['track']['models'][0]
+ title = track_name = track['name']
+ get_model = lambda x: try_get(models, lambda y: y[x]['models'][0], dict) or {}
+ artist = get_model('artist')
+ artist_name = artist.get('name')
+ if artist_name:
+ title = '%s - %s' % (artist_name, title)
+ album = get_model('album')
+
+ formats = [{
+ 'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294'
+ % (sub_domain, track_id, format_id),
+ 'format_id': format_id,
+ 'ext': ext,
+ 'quality': quality,
+ } for quality, (format_id, sub_domain, ext) in enumerate((
+ ('mp31', 'mp3l', 'mp3'),
+ ('mp32', 'mp3d', 'mp3'),
+ ('ogg1', 'ogg', 'ogg'),
+ ('flac', 'flac', 'flac'),
+ ))]
+ self._sort_formats(formats)
+
+ urls = []
+ thumbnails = []
+ for _, covers in track.get('cover', {}).items():
+ for cover_id, cover_url in covers.items():
+ if not cover_url or cover_url in urls:
+ continue
+ urls.append(cover_url)
+ size = int_or_none(cover_id.lstrip('size'))
+ thumbnails.append({
+ 'id': cover_id,
+ 'url': cover_url,
+ 'width': size,
+ 'height': size,
+ })
+
+ tags = []
+ for tag in track.get('tags', []):
+ tag_name = tag.get('name')
+ if not tag_name:
+ continue
+ tags.append(tag_name)
+
+ stats = track.get('stats') or {}
+
+ return {
+ 'id': track_id,
+ 'display_id': display_id,
+ 'thumbnails': thumbnails,
+ 'title': title,
+ 'description': track.get('description'),
+ 'duration': int_or_none(track.get('duration')),
+ 'artist': artist_name,
+ 'track': track_name,
+ 'album': album.get('name'),
+ 'formats': formats,
+ 'license': '-'.join(track.get('licenseCC', [])) or None,
+ 'timestamp': int_or_none(track.get('dateCreated')),
+ 'view_count': int_or_none(stats.get('listenedAll')),
+ 'like_count': int_or_none(stats.get('favorited')),
+ 'average_rating': int_or_none(stats.get('averageNote')),
+ 'tags': tags,
+ }
+
+
+class JamendoAlbumIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)'
+ _TEST = {
+ 'url': 'https://www.jamendo.com/album/121486/duck-on-cover',
+ 'info_dict': {
+ 'id': '121486',
+ 'title': 'Duck On Cover',
+ 'description': 'md5:c2920eaeef07d7af5b96d7c64daf1239',
+ },
+ 'playlist': [{
+ 'md5': 'e1a2fcb42bda30dfac990212924149a8',
+ 'info_dict': {
+ 'id': '1032333',
+ 'ext': 'flac',
+ 'title': 'Shearer - Warmachine',
+ 'artist': 'Shearer',
+ 'track': 'Warmachine',
+ 'timestamp': 1368089771,
+ 'upload_date': '20130509',
+ }
+ }, {
+ 'md5': '1f358d7b2f98edfe90fd55dac0799d50',
+ 'info_dict': {
+ 'id': '1032330',
+ 'ext': 'flac',
+ 'title': 'Shearer - Without Your Ghost',
+ 'artist': 'Shearer',
+ 'track': 'Without Your Ghost',
+ 'timestamp': 1368089771,
+ 'upload_date': '20130509',
+ }
+ }],
+ 'params': {
+ 'playlistend': 2
+ }
+ }
+
+ def _call_api(self, resource, resource_id):
+ path = '/api/%ss' % resource
+ rand = compat_str(random.random())
+ return self._download_json(
+ 'https://www.jamendo.com' + path, resource_id, query={
+ 'id[]': resource_id,
+ }, headers={
+ 'X-Jam-Call': '$%s*%s~' % (hashlib.sha1((path + rand).encode()).hexdigest(), rand)
+ })[0]
+
+ def _real_extract(self, url):
+ album_id = self._match_id(url)
+ album = self._call_api('album', album_id)
+ album_name = album.get('name')
+
+ entries = []
+ for track in album.get('tracks', []):
+ track_id = track.get('id')
+ if not track_id:
+ continue
+ track_id = compat_str(track_id)
+ entries.append({
+ '_type': 'url_transparent',
+ 'url': 'https://www.jamendo.com/track/' + track_id,
+ 'ie_key': JamendoIE.ie_key(),
+ 'id': track_id,
+ 'album': album_name,
+ })
+
+ return self.playlist_result(
+ entries, album_id, album_name,
+ clean_html(try_get(album, lambda x: x['description']['en'], compat_str)))
diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dlc/extractor/jeuxvideo.py
index e9f4ed738..e9f4ed738 100644
--- a/youtube_dl/extractor/jeuxvideo.py
+++ b/youtube_dlc/extractor/jeuxvideo.py
diff --git a/youtube_dlc/extractor/joj.py b/youtube_dlc/extractor/joj.py
new file mode 100644
index 000000000..637618183
--- /dev/null
+++ b/youtube_dlc/extractor/joj.py
@@ -0,0 +1,108 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ js_to_json,
+ try_get,
+)
+
+
+class JojIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ joj:|
+ https?://media\.joj\.sk/embed/
+ )
+ (?P<id>[^/?#^]+)
+ '''
+ _TESTS = [{
+ 'url': 'https://media.joj.sk/embed/a388ec4c-6019-4a4a-9312-b1bee194e932',
+ 'info_dict': {
+ 'id': 'a388ec4c-6019-4a4a-9312-b1bee194e932',
+ 'ext': 'mp4',
+ 'title': 'NOVÉ BÝVANIE',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 3118,
+ }
+ }, {
+ 'url': 'https://media.joj.sk/embed/9i1cxv',
+ 'only_matching': True,
+ }, {
+ 'url': 'joj:a388ec4c-6019-4a4a-9312-b1bee194e932',
+ 'only_matching': True,
+ }, {
+ 'url': 'joj:9i1cxv',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [
+ mobj.group('url')
+ for mobj in re.finditer(
+ r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//media\.joj\.sk/embed/(?:(?!\1).)+)\1',
+ webpage)]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'https://media.joj.sk/embed/%s' % video_id, video_id)
+
+ title = self._search_regex(
+ (r'videoTitle\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
+ r'<title>(?P<title>[^<]+)'), webpage, 'title',
+ default=None, group='title') or self._og_search_title(webpage)
+
+ bitrates = self._parse_json(
+ self._search_regex(
+ r'(?s)(?:src|bitrates)\s*=\s*({.+?});', webpage, 'bitrates',
+ default='{}'),
+ video_id, transform_source=js_to_json, fatal=False)
+
+ formats = []
+ for format_url in try_get(bitrates, lambda x: x['mp4'], list) or []:
+ if isinstance(format_url, compat_str):
+ height = self._search_regex(
+ r'(\d+)[pP]\.', format_url, 'height', default=None)
+ formats.append({
+ 'url': format_url,
+ 'format_id': '%sp' % height if height else None,
+ 'height': int(height),
+ })
+ if not formats:
+ playlist = self._download_xml(
+ 'https://media.joj.sk/services/Video.php?clip=%s' % video_id,
+ video_id)
+ for file_el in playlist.findall('./files/file'):
+ path = file_el.get('path')
+ if not path:
+ continue
+ format_id = file_el.get('id') or file_el.get('label')
+ formats.append({
+ 'url': 'http://n16.joj.sk/storage/%s' % path.replace(
+ 'dat/', '', 1),
+ 'format_id': format_id,
+ 'height': int_or_none(self._search_regex(
+ r'(\d+)[pP]', format_id or path, 'height',
+ default=None)),
+ })
+ self._sort_formats(formats)
+
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ duration = int_or_none(self._search_regex(
+ r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/jove.py b/youtube_dlc/extractor/jove.py
index 27e0e37f6..27e0e37f6 100644
--- a/youtube_dl/extractor/jove.py
+++ b/youtube_dlc/extractor/jove.py
diff --git a/youtube_dlc/extractor/jwplatform.py b/youtube_dlc/extractor/jwplatform.py
new file mode 100644
index 000000000..c34b5f5e6
--- /dev/null
+++ b/youtube_dlc/extractor/jwplatform.py
@@ -0,0 +1,46 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import unsmuggle_url
+
+
+class JWPlatformIE(InfoExtractor):
+ _VALID_URL = r'(?:https?://(?:content\.jwplatform|cdn\.jwplayer)\.com/(?:(?:feed|player|thumb|preview)s|jw6|v2/media)/|jwplatform:)(?P<id>[a-zA-Z0-9]{8})'
+ _TESTS = [{
+ 'url': 'http://content.jwplatform.com/players/nPripu9l-ALJ3XQCI.js',
+ 'md5': 'fa8899fa601eb7c83a64e9d568bdf325',
+ 'info_dict': {
+ 'id': 'nPripu9l',
+ 'ext': 'mov',
+ 'title': 'Big Buck Bunny Trailer',
+ 'description': 'Big Buck Bunny is a short animated film by the Blender Institute. It is made using free and open source software.',
+ 'upload_date': '20081127',
+ 'timestamp': 1227796140,
+ }
+ }, {
+ 'url': 'https://cdn.jwplayer.com/players/nPripu9l-ALJ3XQCI.js',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_url(webpage):
+ urls = JWPlatformIE._extract_urls(webpage)
+ return urls[0] if urls else None
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<(?:script|iframe)[^>]+?src=["\']((?:https?:)?//(?:content\.jwplatform|cdn\.jwplayer)\.com/players/[a-zA-Z0-9]{8})',
+ webpage)
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ self._initialize_geo_bypass({
+ 'countries': smuggled_data.get('geo_countries'),
+ })
+ video_id = self._match_id(url)
+ json_data = self._download_json('https://cdn.jwplayer.com/v2/media/' + video_id, video_id)
+ return self._parse_jwplayer_data(json_data, video_id)
diff --git a/youtube_dlc/extractor/kakao.py b/youtube_dlc/extractor/kakao.py
new file mode 100644
index 000000000..32935bb28
--- /dev/null
+++ b/youtube_dlc/extractor/kakao.py
@@ -0,0 +1,147 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ strip_or_none,
+ unified_timestamp,
+ update_url_query,
+)
+
+
+class KakaoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:play-)?tv\.kakao\.com/(?:channel/\d+|embed/player)/cliplink/(?P<id>\d+|[^?#&]+@my)'
+ _API_BASE_TMPL = 'http://tv.kakao.com/api/v1/ft/cliplinks/%s/'
+
+ _TESTS = [{
+ 'url': 'http://tv.kakao.com/channel/2671005/cliplink/301965083',
+ 'md5': '702b2fbdeb51ad82f5c904e8c0766340',
+ 'info_dict': {
+ 'id': '301965083',
+ 'ext': 'mp4',
+ 'title': '乃木坂46 バナナマン 「3期生紹介コーナーが始動!顔高低差GPも!」 『乃木坂工事中』',
+ 'uploader_id': 2671005,
+ 'uploader': '그랑그랑이',
+ 'timestamp': 1488160199,
+ 'upload_date': '20170227',
+ }
+ }, {
+ 'url': 'http://tv.kakao.com/channel/2653210/cliplink/300103180',
+ 'md5': 'a8917742069a4dd442516b86e7d66529',
+ 'info_dict': {
+ 'id': '300103180',
+ 'ext': 'mp4',
+ 'description': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)\r\n\r\n[쇼! 음악중심] 20160611, 507회',
+ 'title': '러블리즈 - Destiny (나의 지구) (Lovelyz - Destiny)',
+ 'uploader_id': 2653210,
+ 'uploader': '쇼! 음악중심',
+ 'timestamp': 1485684628,
+ 'upload_date': '20170129',
+ }
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ display_id = video_id.rstrip('@my')
+ api_base = self._API_BASE_TMPL % video_id
+
+ player_header = {
+ 'Referer': update_url_query(
+ 'http://tv.kakao.com/embed/player/cliplink/%s' % video_id, {
+ 'service': 'kakao_tv',
+ 'autoplay': '1',
+ 'profile': 'HIGH',
+ 'wmode': 'transparent',
+ })
+ }
+
+ query = {
+ 'player': 'monet_html5',
+ 'referer': url,
+ 'uuid': '',
+ 'service': 'kakao_tv',
+ 'section': '',
+ 'dteType': 'PC',
+ 'fields': ','.join([
+ '-*', 'tid', 'clipLink', 'displayTitle', 'clip', 'title',
+ 'description', 'channelId', 'createTime', 'duration', 'playCount',
+ 'likeCount', 'commentCount', 'tagList', 'channel', 'name',
+ 'clipChapterThumbnailList', 'thumbnailUrl', 'timeInSec', 'isDefault',
+ 'videoOutputList', 'width', 'height', 'kbps', 'profile', 'label'])
+ }
+
+ impress = self._download_json(
+ api_base + 'impress', display_id, 'Downloading video info',
+ query=query, headers=player_header)
+
+ clip_link = impress['clipLink']
+ clip = clip_link['clip']
+
+ title = clip.get('title') or clip_link.get('displayTitle')
+
+ query['tid'] = impress.get('tid', '')
+
+ formats = []
+ for fmt in clip.get('videoOutputList', []):
+ try:
+ profile_name = fmt['profile']
+ if profile_name == 'AUDIO':
+ continue
+ query.update({
+ 'profile': profile_name,
+ 'fields': '-*,url',
+ })
+ fmt_url_json = self._download_json(
+ api_base + 'raw/videolocation', display_id,
+ 'Downloading video URL for profile %s' % profile_name,
+ query=query, headers=player_header, fatal=False)
+
+ if fmt_url_json is None:
+ continue
+
+ fmt_url = fmt_url_json['url']
+ formats.append({
+ 'url': fmt_url,
+ 'format_id': profile_name,
+ 'width': int_or_none(fmt.get('width')),
+ 'height': int_or_none(fmt.get('height')),
+ 'format_note': fmt.get('label'),
+ 'filesize': int_or_none(fmt.get('filesize')),
+ 'tbr': int_or_none(fmt.get('kbps')),
+ })
+ except KeyError:
+ pass
+ self._sort_formats(formats)
+
+ thumbs = []
+ for thumb in clip.get('clipChapterThumbnailList', []):
+ thumbs.append({
+ 'url': thumb.get('thumbnailUrl'),
+ 'id': compat_str(thumb.get('timeInSec')),
+ 'preference': -1 if thumb.get('isDefault') else 0
+ })
+ top_thumbnail = clip.get('thumbnailUrl')
+ if top_thumbnail:
+ thumbs.append({
+ 'url': top_thumbnail,
+ 'preference': 10,
+ })
+
+ return {
+ 'id': display_id,
+ 'title': title,
+ 'description': strip_or_none(clip.get('description')),
+ 'uploader': clip_link.get('channel', {}).get('name'),
+ 'uploader_id': clip_link.get('channelId'),
+ 'thumbnails': thumbs,
+ 'timestamp': unified_timestamp(clip_link.get('createTime')),
+ 'duration': int_or_none(clip.get('duration')),
+ 'view_count': int_or_none(clip.get('playCount')),
+ 'like_count': int_or_none(clip.get('likeCount')),
+ 'comment_count': int_or_none(clip.get('commentCount')),
+ 'formats': formats,
+ 'tags': clip.get('tagList'),
+ }
diff --git a/youtube_dlc/extractor/kaltura.py b/youtube_dlc/extractor/kaltura.py
new file mode 100644
index 000000000..49d13460d
--- /dev/null
+++ b/youtube_dlc/extractor/kaltura.py
@@ -0,0 +1,377 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import base64
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_urlparse,
+ compat_parse_qs,
+)
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ unsmuggle_url,
+ smuggle_url,
+)
+
+
+class KalturaIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ kaltura:(?P<partner_id>\d+):(?P<id>[0-9a-z_]+)|
+ https?://
+ (:?(?:www|cdnapi(?:sec)?)\.)?kaltura\.com(?::\d+)?/
+ (?:
+ (?:
+ # flash player
+ index\.php/(?:kwidget|extwidget/preview)|
+ # html5 player
+ html5/html5lib/[^/]+/mwEmbedFrame\.php
+ )
+ )(?:/(?P<path>[^?]+))?(?:\?(?P<query>.*))?
+ )
+ '''
+ _SERVICE_URL = 'http://cdnapi.kaltura.com'
+ _SERVICE_BASE = '/api_v3/index.php'
+ # See https://github.com/kaltura/server/blob/master/plugins/content/caption/base/lib/model/enums/CaptionType.php
+ _CAPTION_TYPES = {
+ 1: 'srt',
+ 2: 'ttml',
+ 3: 'vtt',
+ }
+ _TESTS = [
+ {
+ 'url': 'kaltura:269692:1_1jc2y3e4',
+ 'md5': '3adcbdb3dcc02d647539e53f284ba171',
+ 'info_dict': {
+ 'id': '1_1jc2y3e4',
+ 'ext': 'mp4',
+ 'title': 'Straight from the Heart',
+ 'upload_date': '20131219',
+ 'uploader_id': 'mlundberg@wolfgangsvault.com',
+ 'description': 'The Allman Brothers Band, 12/16/1981',
+ 'thumbnail': 're:^https?://.*/thumbnail/.*',
+ 'timestamp': int,
+ },
+ },
+ {
+ 'url': 'http://www.kaltura.com/index.php/kwidget/cache_st/1300318621/wid/_269692/uiconf_id/3873291/entry_id/1_1jc2y3e4',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://cdnapisec.kaltura.com/index.php/kwidget/wid/_557781/uiconf_id/22845202/entry_id/1_plr1syf3',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://cdnapisec.kaltura.com/html5/html5lib/v2.30.2/mwEmbedFrame.php/p/1337/uiconf_id/20540612/entry_id/1_sf5ovm7u?wid=_243342',
+ 'only_matching': True,
+ },
+ {
+ # video with subtitles
+ 'url': 'kaltura:111032:1_cw786r8q',
+ 'only_matching': True,
+ },
+ {
+ # video with ttml subtitles (no fileExt)
+ 'url': 'kaltura:1926081:0_l5ye1133',
+ 'info_dict': {
+ 'id': '0_l5ye1133',
+ 'ext': 'mp4',
+ 'title': 'What Can You Do With Python?',
+ 'upload_date': '20160221',
+ 'uploader_id': 'stork',
+ 'thumbnail': 're:^https?://.*/thumbnail/.*',
+ 'timestamp': int,
+ 'subtitles': {
+ 'en': [{
+ 'ext': 'ttml',
+ }],
+ },
+ },
+ 'skip': 'Gone. Maybe https://www.safaribooksonline.com/library/tutorials/introduction-to-python-anon/3469/',
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://www.kaltura.com/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://www.kaltura.com:443/index.php/extwidget/preview/partner_id/1770401/uiconf_id/37307382/entry_id/0_58u8kme7/embed/iframe?&flashvars[streamerType]=auto',
+ 'only_matching': True,
+ },
+ {
+ # unavailable source format
+ 'url': 'kaltura:513551:1_66x4rg7o',
+ 'only_matching': True,
+ }
+ ]
+
+ @staticmethod
+ def _extract_url(webpage):
+ urls = KalturaIE._extract_urls(webpage)
+ return urls[0] if urls else None
+
+ @staticmethod
+ def _extract_urls(webpage):
+ # Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
+ finditer = (
+ re.finditer(
+ r"""(?xs)
+ kWidget\.(?:thumb)?[Ee]mbed\(
+ \{.*?
+ (?P<q1>['"])wid(?P=q1)\s*:\s*
+ (?P<q2>['"])_?(?P<partner_id>(?:(?!(?P=q2)).)+)(?P=q2),.*?
+ (?P<q3>['"])entry_?[Ii]d(?P=q3)\s*:\s*
+ (?P<q4>['"])(?P<id>(?:(?!(?P=q4)).)+)(?P=q4)(?:,|\s*\})
+ """, webpage)
+ or re.finditer(
+ r'''(?xs)
+ (?P<q1>["'])
+ (?:https?:)?//cdnapi(?:sec)?\.kaltura\.com(?::\d+)?/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)(?:(?!(?P=q1)).)*
+ (?P=q1).*?
+ (?:
+ (?:
+ entry_?[Ii]d|
+ (?P<q2>["'])entry_?[Ii]d(?P=q2)
+ )\s*:\s*|
+ \[\s*(?P<q2_1>["'])entry_?[Ii]d(?P=q2_1)\s*\]\s*=\s*
+ )
+ (?P<q3>["'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
+ ''', webpage)
+ or re.finditer(
+ r'''(?xs)
+ <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])
+ (?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
+ (?:(?!(?P=q1)).)*
+ [?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
+ (?:(?!(?P=q1)).)*
+ (?P=q1)
+ ''', webpage)
+ )
+ urls = []
+ for mobj in finditer:
+ embed_info = mobj.groupdict()
+ for k, v in embed_info.items():
+ if v:
+ embed_info[k] = v.strip()
+ url = 'kaltura:%(partner_id)s:%(id)s' % embed_info
+ escaped_pid = re.escape(embed_info['partner_id'])
+ service_mobj = re.search(
+ r'<script[^>]+src=(["\'])(?P<id>(?:https?:)?//(?:(?!\1).)+)/p/%s/sp/%s00/embedIframeJs' % (escaped_pid, escaped_pid),
+ webpage)
+ if service_mobj:
+ url = smuggle_url(url, {'service_url': service_mobj.group('id')})
+ urls.append(url)
+ return urls
+
+ def _kaltura_api_call(self, video_id, actions, service_url=None, *args, **kwargs):
+ params = actions[0]
+ if len(actions) > 1:
+ for i, a in enumerate(actions[1:], start=1):
+ for k, v in a.items():
+ params['%d:%s' % (i, k)] = v
+
+ data = self._download_json(
+ (service_url or self._SERVICE_URL) + self._SERVICE_BASE,
+ video_id, query=params, *args, **kwargs)
+
+ status = data if len(actions) == 1 else data[0]
+ if status.get('objectType') == 'KalturaAPIException':
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, status['message']))
+
+ return data
+
+ def _get_video_info(self, video_id, partner_id, service_url=None):
+ actions = [
+ {
+ 'action': 'null',
+ 'apiVersion': '3.1.5',
+ 'clientTag': 'kdp:v3.8.5',
+ 'format': 1, # JSON, 2 = XML, 3 = PHP
+ 'service': 'multirequest',
+ },
+ {
+ 'expiry': 86400,
+ 'service': 'session',
+ 'action': 'startWidgetSession',
+ 'widgetId': '_%s' % partner_id,
+ },
+ {
+ 'action': 'get',
+ 'entryId': video_id,
+ 'service': 'baseentry',
+ 'ks': '{1:result:ks}',
+ 'responseProfile:fields': 'createdAt,dataUrl,duration,name,plays,thumbnailUrl,userId',
+ 'responseProfile:type': 1,
+ },
+ {
+ 'action': 'getbyentryid',
+ 'entryId': video_id,
+ 'service': 'flavorAsset',
+ 'ks': '{1:result:ks}',
+ },
+ {
+ 'action': 'list',
+ 'filter:entryIdEqual': video_id,
+ 'service': 'caption_captionasset',
+ 'ks': '{1:result:ks}',
+ },
+ ]
+ return self._kaltura_api_call(
+ video_id, actions, service_url, note='Downloading video info JSON')
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+
+ mobj = re.match(self._VALID_URL, url)
+ partner_id, entry_id = mobj.group('partner_id', 'id')
+ ks = None
+ captions = None
+ if partner_id and entry_id:
+ _, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id, smuggled_data.get('service_url'))
+ else:
+ path, query = mobj.group('path', 'query')
+ if not path and not query:
+ raise ExtractorError('Invalid URL', expected=True)
+ params = {}
+ if query:
+ params = compat_parse_qs(query)
+ if path:
+ splitted_path = path.split('/')
+ params.update(dict((zip(splitted_path[::2], [[v] for v in splitted_path[1::2]]))))
+ if 'wid' in params:
+ partner_id = params['wid'][0][1:]
+ elif 'p' in params:
+ partner_id = params['p'][0]
+ elif 'partner_id' in params:
+ partner_id = params['partner_id'][0]
+ else:
+ raise ExtractorError('Invalid URL', expected=True)
+ if 'entry_id' in params:
+ entry_id = params['entry_id'][0]
+ _, info, flavor_assets, captions = self._get_video_info(entry_id, partner_id)
+ elif 'uiconf_id' in params and 'flashvars[referenceId]' in params:
+ reference_id = params['flashvars[referenceId]'][0]
+ webpage = self._download_webpage(url, reference_id)
+ entry_data = self._parse_json(self._search_regex(
+ r'window\.kalturaIframePackageData\s*=\s*({.*});',
+ webpage, 'kalturaIframePackageData'),
+ reference_id)['entryResult']
+ info, flavor_assets = entry_data['meta'], entry_data['contextData']['flavorAssets']
+ entry_id = info['id']
+ # Unfortunately, data returned in kalturaIframePackageData lacks
+ # captions so we will try requesting the complete data using
+ # regular approach since we now know the entry_id
+ try:
+ _, info, flavor_assets, captions = self._get_video_info(
+ entry_id, partner_id)
+ except ExtractorError:
+ # Regular scenario failed but we already have everything
+ # extracted apart from captions and can process at least
+ # with this
+ pass
+ else:
+ raise ExtractorError('Invalid URL', expected=True)
+ ks = params.get('flashvars[ks]', [None])[0]
+
+ source_url = smuggled_data.get('source_url')
+ if source_url:
+ referrer = base64.b64encode(
+ '://'.join(compat_urlparse.urlparse(source_url)[:2])
+ .encode('utf-8')).decode('utf-8')
+ else:
+ referrer = None
+
+ def sign_url(unsigned_url):
+ if ks:
+ unsigned_url += '/ks/%s' % ks
+ if referrer:
+ unsigned_url += '?referrer=%s' % referrer
+ return unsigned_url
+
+ data_url = info['dataUrl']
+ if '/flvclipper/' in data_url:
+ data_url = re.sub(r'/flvclipper/.*', '/serveFlavor', data_url)
+
+ formats = []
+ for f in flavor_assets:
+ # Continue if asset is not ready
+ if f.get('status') != 2:
+ continue
+ # Original format that's not available (e.g. kaltura:1926081:0_c03e1b5g)
+ # skip for now.
+ if f.get('fileExt') == 'chun':
+ continue
+ # DRM-protected video, cannot be decrypted
+ if f.get('fileExt') == 'wvm':
+ continue
+ if not f.get('fileExt'):
+ # QT indicates QuickTime; some videos have broken fileExt
+ if f.get('containerFormat') == 'qt':
+ f['fileExt'] = 'mov'
+ else:
+ f['fileExt'] = 'mp4'
+ video_url = sign_url(
+ '%s/flavorId/%s' % (data_url, f['id']))
+ format_id = '%(fileExt)s-%(bitrate)s' % f
+ # Source format may not be available (e.g. kaltura:513551:1_66x4rg7o)
+ if f.get('isOriginal') is True and not self._is_valid_url(
+ video_url, entry_id, format_id):
+ continue
+ # audio-only has no videoCodecId (e.g. kaltura:1926081:0_c03e1b5g
+ # -f mp4-56)
+ vcodec = 'none' if 'videoCodecId' not in f and f.get(
+ 'frameRate') == 0 else f.get('videoCodecId')
+ formats.append({
+ 'format_id': format_id,
+ 'ext': f.get('fileExt'),
+ 'tbr': int_or_none(f['bitrate']),
+ 'fps': int_or_none(f.get('frameRate')),
+ 'filesize_approx': int_or_none(f.get('size'), invscale=1024),
+ 'container': f.get('containerFormat'),
+ 'vcodec': vcodec,
+ 'height': int_or_none(f.get('height')),
+ 'width': int_or_none(f.get('width')),
+ 'url': video_url,
+ })
+ if '/playManifest/' in data_url:
+ m3u8_url = sign_url(data_url.replace(
+ 'format/url', 'format/applehttp'))
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, entry_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+
+ self._sort_formats(formats)
+
+ subtitles = {}
+ if captions:
+ for caption in captions.get('objects', []):
+ # Continue if caption is not ready
+ if caption.get('status') != 2:
+ continue
+ if not caption.get('id'):
+ continue
+ caption_format = int_or_none(caption.get('format'))
+ subtitles.setdefault(caption.get('languageCode') or caption.get('language'), []).append({
+ 'url': '%s/api_v3/service/caption_captionasset/action/serve/captionAssetId/%s' % (self._SERVICE_URL, caption['id']),
+ 'ext': caption.get('fileExt') or self._CAPTION_TYPES.get(caption_format) or 'ttml',
+ })
+
+ return {
+ 'id': entry_id,
+ 'title': info['name'],
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'description': clean_html(info.get('description')),
+ 'thumbnail': info.get('thumbnailUrl'),
+ 'duration': info.get('duration'),
+ 'timestamp': info.get('createdAt'),
+ 'uploader_id': info.get('userId') if info.get('userId') != 'None' else None,
+ 'view_count': info.get('plays'),
+ }
diff --git a/youtube_dl/extractor/kanalplay.py b/youtube_dlc/extractor/kanalplay.py
index 6c3498c67..6c3498c67 100644
--- a/youtube_dl/extractor/kanalplay.py
+++ b/youtube_dlc/extractor/kanalplay.py
diff --git a/youtube_dl/extractor/kankan.py b/youtube_dlc/extractor/kankan.py
index a677ff447..a677ff447 100644
--- a/youtube_dl/extractor/kankan.py
+++ b/youtube_dlc/extractor/kankan.py
diff --git a/youtube_dl/extractor/karaoketv.py b/youtube_dlc/extractor/karaoketv.py
index bfccf89b0..bfccf89b0 100644
--- a/youtube_dl/extractor/karaoketv.py
+++ b/youtube_dlc/extractor/karaoketv.py
diff --git a/youtube_dl/extractor/karrierevideos.py b/youtube_dlc/extractor/karrierevideos.py
index 7b291e0a0..7b291e0a0 100644
--- a/youtube_dl/extractor/karrierevideos.py
+++ b/youtube_dlc/extractor/karrierevideos.py
diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dlc/extractor/keezmovies.py
index c3eb74c17..c3eb74c17 100644
--- a/youtube_dl/extractor/keezmovies.py
+++ b/youtube_dlc/extractor/keezmovies.py
diff --git a/youtube_dl/extractor/ketnet.py b/youtube_dlc/extractor/ketnet.py
index 93a98e1e0..93a98e1e0 100644
--- a/youtube_dl/extractor/ketnet.py
+++ b/youtube_dlc/extractor/ketnet.py
diff --git a/youtube_dl/extractor/khanacademy.py b/youtube_dlc/extractor/khanacademy.py
index 61739efa7..61739efa7 100644
--- a/youtube_dl/extractor/khanacademy.py
+++ b/youtube_dlc/extractor/khanacademy.py
diff --git a/youtube_dl/extractor/kickstarter.py b/youtube_dlc/extractor/kickstarter.py
index d4da8f484..d4da8f484 100644
--- a/youtube_dl/extractor/kickstarter.py
+++ b/youtube_dlc/extractor/kickstarter.py
diff --git a/youtube_dlc/extractor/kinja.py b/youtube_dlc/extractor/kinja.py
new file mode 100644
index 000000000..79e3026d2
--- /dev/null
+++ b/youtube_dlc/extractor/kinja.py
@@ -0,0 +1,221 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_unquote,
+)
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ strip_or_none,
+ try_get,
+ unescapeHTML,
+ urljoin,
+)
+
+
+class KinjaEmbedIE(InfoExtractor):
+ IENAME = 'kinja:embed'
+ _DOMAIN_REGEX = r'''(?:[^.]+\.)?
+ (?:
+ avclub|
+ clickhole|
+ deadspin|
+ gizmodo|
+ jalopnik|
+ jezebel|
+ kinja|
+ kotaku|
+ lifehacker|
+ splinternews|
+ the(?:inventory|onion|root|takeout)
+ )\.com'''
+ _COMMON_REGEX = r'''/
+ (?:
+ ajax/inset|
+ embed/video
+ )/iframe\?.*?\bid='''
+ _VALID_URL = r'''(?x)https?://%s%s
+ (?P<type>
+ fb|
+ imgur|
+ instagram|
+ jwp(?:layer)?-video|
+ kinjavideo|
+ mcp|
+ megaphone|
+ ooyala|
+ soundcloud(?:-playlist)?|
+ tumblr-post|
+ twitch-stream|
+ twitter|
+ ustream-channel|
+ vimeo|
+ vine|
+ youtube-(?:list|video)
+ )-(?P<id>[^&]+)''' % (_DOMAIN_REGEX, _COMMON_REGEX)
+ _TESTS = [{
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=fb-10103303356633621',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=kinjavideo-100313',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=megaphone-PPY1300931075',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=ooyala-xzMXhleDpopuT0u1ijt_qZj3Va-34pEX%2FZTIxYmJjZDM2NWYzZDViZGRiOWJjYzc5',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-128574047',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=soundcloud-playlist-317413750',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=tumblr-post-160130699814-daydreams-at-midnight',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=twitch-stream-libratus_extra',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=twitter-1068875942473404422',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=ustream-channel-10414700',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=vimeo-120153502',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=vine-5BlvV5qqPrD',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-list-BCQ3KyrPjgA/PLE6509247C270A72E',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://kinja.com/ajax/inset/iframe?id=youtube-video-00QyL0AgPAE',
+ 'only_matching': True,
+ }]
+ _JWPLATFORM_PROVIDER = ('cdn.jwplayer.com/v2/media/', 'JWPlatform')
+ _PROVIDER_MAP = {
+ 'fb': ('facebook.com/video.php?v=', 'Facebook'),
+ 'imgur': ('imgur.com/', 'Imgur'),
+ 'instagram': ('instagram.com/p/', 'Instagram'),
+ 'jwplayer-video': _JWPLATFORM_PROVIDER,
+ 'jwp-video': _JWPLATFORM_PROVIDER,
+ 'megaphone': ('player.megaphone.fm/', 'Generic'),
+ 'ooyala': ('player.ooyala.com/player.js?embedCode=', 'Ooyala'),
+ 'soundcloud': ('api.soundcloud.com/tracks/', 'Soundcloud'),
+ 'soundcloud-playlist': ('api.soundcloud.com/playlists/', 'SoundcloudPlaylist'),
+ 'tumblr-post': ('%s.tumblr.com/post/%s', 'Tumblr'),
+ 'twitch-stream': ('twitch.tv/', 'TwitchStream'),
+ 'twitter': ('twitter.com/i/cards/tfw/v1/', 'TwitterCard'),
+ 'ustream-channel': ('ustream.tv/embed/', 'Ustream'),
+ 'vimeo': ('vimeo.com/', 'Vimeo'),
+ 'vine': ('vine.co/v/', 'Vine'),
+ 'youtube-list': ('youtube.com/embed/%s?list=%s', 'YoutubePlaylist'),
+ 'youtube-video': ('youtube.com/embed/', 'Youtube'),
+ }
+
+ @staticmethod
+ def _extract_urls(webpage, url):
+ return [urljoin(url, unescapeHTML(mobj.group('url'))) for mobj in re.finditer(
+ r'(?x)<iframe[^>]+?src=(?P<q>["\'])(?P<url>(?:(?:https?:)?//%s)?%s(?:(?!\1).)+)\1' % (KinjaEmbedIE._DOMAIN_REGEX, KinjaEmbedIE._COMMON_REGEX),
+ webpage)]
+
+ def _real_extract(self, url):
+ video_type, video_id = re.match(self._VALID_URL, url).groups()
+
+ provider = self._PROVIDER_MAP.get(video_type)
+ if provider:
+ video_id = compat_urllib_parse_unquote(video_id)
+ if video_type == 'tumblr-post':
+ video_id, blog = video_id.split('-', 1)
+ result_url = provider[0] % (blog, video_id)
+ elif video_type == 'youtube-list':
+ video_id, playlist_id = video_id.split('/')
+ result_url = provider[0] % (video_id, playlist_id)
+ else:
+ if video_type == 'ooyala':
+ video_id = video_id.split('/')[0]
+ result_url = provider[0] + video_id
+ return self.url_result('http://' + result_url, provider[1])
+
+ if video_type == 'kinjavideo':
+ data = self._download_json(
+ 'https://kinja.com/api/core/video/views/videoById',
+ video_id, query={'videoId': video_id})['data']
+ title = data['title']
+
+ formats = []
+ for k in ('signedPlaylist', 'streaming'):
+ m3u8_url = data.get(k + 'Url')
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ self._sort_formats(formats)
+
+ thumbnail = None
+ poster = data.get('poster') or {}
+ poster_id = poster.get('id')
+ if poster_id:
+ thumbnail = 'https://i.kinja-img.com/gawker-media/image/upload/%s.%s' % (poster_id, poster.get('format') or 'jpg')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': strip_or_none(data.get('description')),
+ 'formats': formats,
+ 'tags': data.get('tags'),
+ 'timestamp': int_or_none(try_get(
+ data, lambda x: x['postInfo']['publishTimeMillis']), 1000),
+ 'thumbnail': thumbnail,
+ 'uploader': data.get('network'),
+ }
+ else:
+ video_data = self._download_json(
+ 'https://api.vmh.univision.com/metadata/v1/content/' + video_id,
+ video_id)['videoMetadata']
+ iptc = video_data['photoVideoMetadataIPTC']
+ title = iptc['title']['en']
+ fmg = video_data.get('photoVideoMetadata_fmg') or {}
+ tvss_domain = fmg.get('tvssDomain') or 'https://auth.univision.com'
+ data = self._download_json(
+ tvss_domain + '/api/v3/video-auth/url-signature-tokens',
+ video_id, query={'mcpids': video_id})['data'][0]
+ formats = []
+
+ rendition_url = data.get('renditionUrl')
+ if rendition_url:
+ formats = self._extract_m3u8_formats(
+ rendition_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False)
+
+ fallback_rendition_url = data.get('fallbackRenditionUrl')
+ if fallback_rendition_url:
+ formats.append({
+ 'format_id': 'fallback',
+ 'tbr': int_or_none(self._search_regex(
+ r'_(\d+)\.mp4', fallback_rendition_url,
+ 'bitrate', default=None)),
+ 'url': fallback_rendition_url,
+ })
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': try_get(iptc, lambda x: x['cloudinaryLink']['link'], compat_str),
+ 'uploader': fmg.get('network'),
+ 'duration': int_or_none(iptc.get('fileDuration')),
+ 'formats': formats,
+ 'description': try_get(iptc, lambda x: x['description']['en'], compat_str),
+ 'timestamp': parse_iso8601(iptc.get('dateReleased')),
+ }
diff --git a/youtube_dl/extractor/kinopoisk.py b/youtube_dlc/extractor/kinopoisk.py
index 9e8d01f53..9e8d01f53 100644
--- a/youtube_dl/extractor/kinopoisk.py
+++ b/youtube_dlc/extractor/kinopoisk.py
diff --git a/youtube_dl/extractor/konserthusetplay.py b/youtube_dlc/extractor/konserthusetplay.py
index dd42bb2f2..dd42bb2f2 100644
--- a/youtube_dl/extractor/konserthusetplay.py
+++ b/youtube_dlc/extractor/konserthusetplay.py
diff --git a/youtube_dl/extractor/krasview.py b/youtube_dlc/extractor/krasview.py
index d27d052ff..d27d052ff 100644
--- a/youtube_dl/extractor/krasview.py
+++ b/youtube_dlc/extractor/krasview.py
diff --git a/youtube_dl/extractor/ku6.py b/youtube_dlc/extractor/ku6.py
index a574408e5..a574408e5 100644
--- a/youtube_dl/extractor/ku6.py
+++ b/youtube_dlc/extractor/ku6.py
diff --git a/youtube_dl/extractor/kusi.py b/youtube_dlc/extractor/kusi.py
index 6a7e3baa7..6a7e3baa7 100644
--- a/youtube_dl/extractor/kusi.py
+++ b/youtube_dlc/extractor/kusi.py
diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dlc/extractor/kuwo.py
index cc5b2a1c1..cc5b2a1c1 100644
--- a/youtube_dl/extractor/kuwo.py
+++ b/youtube_dlc/extractor/kuwo.py
diff --git a/youtube_dlc/extractor/la7.py b/youtube_dlc/extractor/la7.py
new file mode 100644
index 000000000..c3b4ffa7e
--- /dev/null
+++ b/youtube_dlc/extractor/la7.py
@@ -0,0 +1,67 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ js_to_json,
+ smuggle_url,
+)
+
+
+class LA7IE(InfoExtractor):
+ IE_NAME = 'la7.it'
+ _VALID_URL = r'''(?x)(https?://)?(?:
+ (?:www\.)?la7\.it/([^/]+)/(?:rivedila7|video)/|
+ tg\.la7\.it/repliche-tgla7\?id=
+ )(?P<id>.+)'''
+
+ _TESTS = [{
+ # 'src' is a plain URL
+ 'url': 'http://www.la7.it/crozza/video/inccool8-02-10-2015-163722',
+ 'md5': '8b613ffc0c4bf9b9e377169fc19c214c',
+ 'info_dict': {
+ 'id': '0_42j6wd36',
+ 'ext': 'mp4',
+ 'title': 'Inc.Cool8',
+ 'description': 'Benvenuti nell\'incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto atletico',
+ 'thumbnail': 're:^https?://.*',
+ 'uploader_id': 'kdla7pillole@iltrovatore.it',
+ 'timestamp': 1443814869,
+ 'upload_date': '20151002',
+ },
+ }, {
+ # 'src' is a dictionary
+ 'url': 'http://tg.la7.it/repliche-tgla7?id=189080',
+ 'md5': '6b0d8888d286e39870208dfeceaf456b',
+ 'info_dict': {
+ 'id': '189080',
+ 'ext': 'mp4',
+ 'title': 'TG LA7',
+ },
+ }, {
+ 'url': 'http://www.la7.it/omnibus/rivedila7/omnibus-news-02-07-2016-189077',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ player_data = self._parse_json(
+ self._search_regex(
+ [r'(?s)videoParams\s*=\s*({.+?});', r'videoLa7\(({[^;]+})\);'],
+ webpage, 'player data'),
+ video_id, transform_source=js_to_json)
+
+ return {
+ '_type': 'url_transparent',
+ 'url': smuggle_url('kaltura:103:%s' % player_data['vid'], {
+ 'service_url': 'http://nkdam.iltrovatore.it',
+ }),
+ 'id': video_id,
+ 'title': player_data['title'],
+ 'description': self._og_search_description(webpage, default=None),
+ 'thumbnail': player_data.get('poster'),
+ 'ie_key': 'Kaltura',
+ }
diff --git a/youtube_dl/extractor/laola1tv.py b/youtube_dlc/extractor/laola1tv.py
index fa217365a..fa217365a 100644
--- a/youtube_dl/extractor/laola1tv.py
+++ b/youtube_dlc/extractor/laola1tv.py
diff --git a/youtube_dl/extractor/lci.py b/youtube_dlc/extractor/lci.py
index 920872f5c..920872f5c 100644
--- a/youtube_dl/extractor/lci.py
+++ b/youtube_dlc/extractor/lci.py
diff --git a/youtube_dl/extractor/lcp.py b/youtube_dlc/extractor/lcp.py
index ade27a99e..ade27a99e 100644
--- a/youtube_dl/extractor/lcp.py
+++ b/youtube_dlc/extractor/lcp.py
diff --git a/youtube_dl/extractor/lecture2go.py b/youtube_dlc/extractor/lecture2go.py
index 81b5d41be..81b5d41be 100644
--- a/youtube_dl/extractor/lecture2go.py
+++ b/youtube_dlc/extractor/lecture2go.py
diff --git a/youtube_dlc/extractor/lecturio.py b/youtube_dlc/extractor/lecturio.py
new file mode 100644
index 000000000..1b2dcef46
--- /dev/null
+++ b/youtube_dlc/extractor/lecturio.py
@@ -0,0 +1,243 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ determine_ext,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ str_or_none,
+ url_or_none,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class LecturioBaseIE(InfoExtractor):
+ _API_BASE_URL = 'https://app.lecturio.com/api/en/latest/html5/'
+ _LOGIN_URL = 'https://app.lecturio.com/en/login'
+ _NETRC_MACHINE = 'lecturio'
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ # Sets some cookies
+ _, urlh = self._download_webpage_handle(
+ self._LOGIN_URL, None, 'Downloading login popup')
+
+ def is_logged(url_handle):
+ return self._LOGIN_URL not in url_handle.geturl()
+
+ # Already logged in
+ if is_logged(urlh):
+ return
+
+ login_form = {
+ 'signin[email]': username,
+ 'signin[password]': password,
+ 'signin[remember]': 'on',
+ }
+
+ response, urlh = self._download_webpage_handle(
+ self._LOGIN_URL, None, 'Logging in',
+ data=urlencode_postdata(login_form))
+
+ # Logged in successfully
+ if is_logged(urlh):
+ return
+
+ errors = self._html_search_regex(
+ r'(?s)<ul[^>]+class=["\']error_list[^>]+>(.+?)</ul>', response,
+ 'errors', default=None)
+ if errors:
+ raise ExtractorError('Unable to login: %s' % errors, expected=True)
+ raise ExtractorError('Unable to log in')
+
+
+class LecturioIE(LecturioBaseIE):
+ _VALID_URL = r'''(?x)
+ https://
+ (?:
+ app\.lecturio\.com/([^/]+/(?P<nt>[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P<id>\d+))|
+ (?:www\.)?lecturio\.de/[^/]+/(?P<nt_de>[^/?#&]+)\.vortrag
+ )
+ '''
+ _TESTS = [{
+ 'url': 'https://app.lecturio.com/medical-courses/important-concepts-and-terms-introduction-to-microbiology.lecture#tab/videos',
+ 'md5': '9a42cf1d8282a6311bf7211bbde26fde',
+ 'info_dict': {
+ 'id': '39634',
+ 'ext': 'mp4',
+ 'title': 'Important Concepts and Terms — Introduction to Microbiology',
+ },
+ 'skip': 'Requires lecturio account credentials',
+ }, {
+ 'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://app.lecturio.com/#/lecture/c/6434/39634',
+ 'only_matching': True,
+ }]
+
+ _CC_LANGS = {
+ 'Arabic': 'ar',
+ 'Bulgarian': 'bg',
+ 'German': 'de',
+ 'English': 'en',
+ 'Spanish': 'es',
+ 'Persian': 'fa',
+ 'French': 'fr',
+ 'Japanese': 'ja',
+ 'Polish': 'pl',
+ 'Pashto': 'ps',
+ 'Russian': 'ru',
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ nt = mobj.group('nt') or mobj.group('nt_de')
+ lecture_id = mobj.group('id')
+ display_id = nt or lecture_id
+ api_path = 'lectures/' + lecture_id if lecture_id else 'lecture/' + nt + '.json'
+ video = self._download_json(
+ self._API_BASE_URL + api_path, display_id)
+ title = video['title'].strip()
+ if not lecture_id:
+ pid = video.get('productId') or video.get('uid')
+ if pid:
+ spid = pid.split('_')
+ if spid and len(spid) == 2:
+ lecture_id = spid[1]
+
+ formats = []
+ for format_ in video['content']['media']:
+ if not isinstance(format_, dict):
+ continue
+ file_ = format_.get('file')
+ if not file_:
+ continue
+ ext = determine_ext(file_)
+ if ext == 'smil':
+ # smil contains only broken RTMP formats anyway
+ continue
+ file_url = url_or_none(file_)
+ if not file_url:
+ continue
+ label = str_or_none(format_.get('label'))
+ filesize = int_or_none(format_.get('fileSize'))
+ f = {
+ 'url': file_url,
+ 'format_id': label,
+ 'filesize': float_or_none(filesize, invscale=1000)
+ }
+ if label:
+ mobj = re.match(r'(\d+)p\s*\(([^)]+)\)', label)
+ if mobj:
+ f.update({
+ 'format_id': mobj.group(2),
+ 'height': int(mobj.group(1)),
+ })
+ formats.append(f)
+ self._sort_formats(formats)
+
+ subtitles = {}
+ automatic_captions = {}
+ captions = video.get('captions') or []
+ for cc in captions:
+ cc_url = cc.get('url')
+ if not cc_url:
+ continue
+ cc_label = cc.get('translatedCode')
+ lang = cc.get('languageCode') or self._search_regex(
+ r'/([a-z]{2})_', cc_url, 'lang',
+ default=cc_label.split()[0] if cc_label else 'en')
+ original_lang = self._search_regex(
+ r'/[a-z]{2}_([a-z]{2})_', cc_url, 'original lang',
+ default=None)
+ sub_dict = (automatic_captions
+ if 'auto-translated' in cc_label or original_lang
+ else subtitles)
+ sub_dict.setdefault(self._CC_LANGS.get(lang, lang), []).append({
+ 'url': cc_url,
+ })
+
+ return {
+ 'id': lecture_id or nt,
+ 'title': title,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'automatic_captions': automatic_captions,
+ }
+
+
+class LecturioCourseIE(LecturioBaseIE):
+ _VALID_URL = r'https://app\.lecturio\.com/(?:[^/]+/(?P<nt>[^/?#&]+)\.course|(?:#/)?course/c/(?P<id>\d+))'
+ _TESTS = [{
+ 'url': 'https://app.lecturio.com/medical-courses/microbiology-introduction.course#/',
+ 'info_dict': {
+ 'id': 'microbiology-introduction',
+ 'title': 'Microbiology: Introduction',
+ 'description': 'md5:13da8500c25880c6016ae1e6d78c386a',
+ },
+ 'playlist_count': 45,
+ 'skip': 'Requires lecturio account credentials',
+ }, {
+ 'url': 'https://app.lecturio.com/#/course/c/6434',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ nt, course_id = re.match(self._VALID_URL, url).groups()
+ display_id = nt or course_id
+ api_path = 'courses/' + course_id if course_id else 'course/content/' + nt + '.json'
+ course = self._download_json(
+ self._API_BASE_URL + api_path, display_id)
+ entries = []
+ for lecture in course.get('lectures', []):
+ lecture_id = str_or_none(lecture.get('id'))
+ lecture_url = lecture.get('url')
+ if lecture_url:
+ lecture_url = urljoin(url, lecture_url)
+ else:
+ lecture_url = 'https://app.lecturio.com/#/lecture/c/%s/%s' % (course_id, lecture_id)
+ entries.append(self.url_result(
+ lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id))
+ return self.playlist_result(
+ entries, display_id, course.get('title'),
+ clean_html(course.get('description')))
+
+
+class LecturioDeCourseIE(LecturioBaseIE):
+ _VALID_URL = r'https://(?:www\.)?lecturio\.de/[^/]+/(?P<id>[^/?#&]+)\.kurs'
+ _TEST = {
+ 'url': 'https://www.lecturio.de/jura/grundrechte.kurs',
+ 'only_matching': True,
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ entries = []
+ for mobj in re.finditer(
+ r'(?s)<td[^>]+\bdata-lecture-id=["\'](?P<id>\d+).+?\bhref=(["\'])(?P<url>(?:(?!\2).)+\.vortrag)\b[^>]+>',
+ webpage):
+ lecture_url = urljoin(url, mobj.group('url'))
+ lecture_id = mobj.group('id')
+ entries.append(self.url_result(
+ lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id))
+
+ title = self._search_regex(
+ r'<h1[^>]*>([^<]+)', webpage, 'title', default=None)
+
+ return self.playlist_result(entries, display_id, title)
diff --git a/youtube_dl/extractor/leeco.py b/youtube_dlc/extractor/leeco.py
index 7dc0ad794..7dc0ad794 100644
--- a/youtube_dl/extractor/leeco.py
+++ b/youtube_dlc/extractor/leeco.py
diff --git a/youtube_dlc/extractor/lego.py b/youtube_dlc/extractor/lego.py
new file mode 100644
index 000000000..1e3c19dfd
--- /dev/null
+++ b/youtube_dlc/extractor/lego.py
@@ -0,0 +1,149 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import uuid
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ qualities,
+)
+
+
+class LEGOIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?lego\.com/(?P<locale>[a-z]{2}-[a-z]{2})/(?:[^/]+/)*videos/(?:[^/]+/)*[^/?#]+-(?P<id>[0-9a-f]{32})'
+ _TESTS = [{
+ 'url': 'http://www.lego.com/en-us/videos/themes/club/blocumentary-kawaguchi-55492d823b1b4d5e985787fa8c2973b1',
+ 'md5': 'f34468f176cfd76488767fc162c405fa',
+ 'info_dict': {
+ 'id': '55492d82-3b1b-4d5e-9857-87fa8c2973b1_en-US',
+ 'ext': 'mp4',
+ 'title': 'Blocumentary Great Creations: Akiyuki Kawaguchi',
+ 'description': 'Blocumentary Great Creations: Akiyuki Kawaguchi',
+ },
+ }, {
+ # geo-restricted but the contentUrl contain a valid url
+ 'url': 'http://www.lego.com/nl-nl/videos/themes/nexoknights/episode-20-kingdom-of-heroes-13bdc2299ab24d9685701a915b3d71e7##sp=399',
+ 'md5': 'c7420221f7ffd03ff056f9db7f8d807c',
+ 'info_dict': {
+ 'id': '13bdc229-9ab2-4d96-8570-1a915b3d71e7_nl-NL',
+ 'ext': 'mp4',
+ 'title': 'Aflevering 20: Helden van het koninkrijk',
+ 'description': 'md5:8ee499aac26d7fa8bcb0cedb7f9c3941',
+ 'age_limit': 5,
+ },
+ }, {
+ # with subtitle
+ 'url': 'https://www.lego.com/nl-nl/kids/videos/classic/creative-storytelling-the-little-puppy-aa24f27c7d5242bc86102ebdc0f24cba',
+ 'info_dict': {
+ 'id': 'aa24f27c-7d52-42bc-8610-2ebdc0f24cba_nl-NL',
+ 'ext': 'mp4',
+ 'title': 'De kleine puppy',
+ 'description': 'md5:5b725471f849348ac73f2e12cfb4be06',
+ 'age_limit': 1,
+ 'subtitles': {
+ 'nl': [{
+ 'ext': 'srt',
+ 'url': r're:^https://.+\.srt$',
+ }],
+ },
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+ _QUALITIES = {
+ 'Lowest': (64, 180, 320),
+ 'Low': (64, 270, 480),
+ 'Medium': (96, 360, 640),
+ 'High': (128, 540, 960),
+ 'Highest': (128, 720, 1280),
+ }
+
+ def _real_extract(self, url):
+ locale, video_id = re.match(self._VALID_URL, url).groups()
+ countries = [locale.split('-')[1].upper()]
+ self._initialize_geo_bypass({
+ 'countries': countries,
+ })
+
+ try:
+ item = self._download_json(
+ # https://contentfeed.services.lego.com/api/v2/item/[VIDEO_ID]?culture=[LOCALE]&contentType=Video
+ 'https://services.slingshot.lego.com/mediaplayer/v2',
+ video_id, query={
+ 'videoId': '%s_%s' % (uuid.UUID(video_id), locale),
+ }, headers=self.geo_verification_headers())
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 451:
+ self.raise_geo_restricted(countries=countries)
+ raise
+
+ video = item['Video']
+ video_id = video['Id']
+ title = video['Title']
+
+ q = qualities(['Lowest', 'Low', 'Medium', 'High', 'Highest'])
+ formats = []
+ for video_source in item.get('VideoFormats', []):
+ video_source_url = video_source.get('Url')
+ if not video_source_url:
+ continue
+ video_source_format = video_source.get('Format')
+ if video_source_format == 'F4M':
+ formats.extend(self._extract_f4m_formats(
+ video_source_url, video_id,
+ f4m_id=video_source_format, fatal=False))
+ elif video_source_format == 'M3U8':
+ formats.extend(self._extract_m3u8_formats(
+ video_source_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id=video_source_format, fatal=False))
+ else:
+ video_source_quality = video_source.get('Quality')
+ format_id = []
+ for v in (video_source_format, video_source_quality):
+ if v:
+ format_id.append(v)
+ f = {
+ 'format_id': '-'.join(format_id),
+ 'quality': q(video_source_quality),
+ 'url': video_source_url,
+ }
+ quality = self._QUALITIES.get(video_source_quality)
+ if quality:
+ f.update({
+ 'abr': quality[0],
+ 'height': quality[1],
+ 'width': quality[2],
+ }),
+ formats.append(f)
+ self._sort_formats(formats)
+
+ subtitles = {}
+ sub_file_id = video.get('SubFileId')
+ if sub_file_id and sub_file_id != '00000000-0000-0000-0000-000000000000':
+ net_storage_path = video.get('NetstoragePath')
+ invariant_id = video.get('InvariantId')
+ video_file_id = video.get('VideoFileId')
+ video_version = video.get('VideoVersion')
+ if net_storage_path and invariant_id and video_file_id and video_version:
+ subtitles.setdefault(locale[:2], []).append({
+ 'url': 'https://lc-mediaplayerns-live-s.legocdn.com/public/%s/%s_%s_%s_%s_sub.srt' % (net_storage_path, invariant_id, video_file_id, locale, video_version),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video.get('Description'),
+ 'thumbnail': video.get('GeneratedCoverImage') or video.get('GeneratedThumbnail'),
+ 'duration': int_or_none(video.get('Length')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'age_limit': int_or_none(video.get('AgeFrom')),
+ 'season': video.get('SeasonTitle'),
+ 'season_number': int_or_none(video.get('Season')) or None,
+ 'episode_number': int_or_none(video.get('Episode')) or None,
+ }
diff --git a/youtube_dl/extractor/lemonde.py b/youtube_dlc/extractor/lemonde.py
index 3306892e8..3306892e8 100644
--- a/youtube_dl/extractor/lemonde.py
+++ b/youtube_dlc/extractor/lemonde.py
diff --git a/youtube_dl/extractor/lenta.py b/youtube_dlc/extractor/lenta.py
index 2ebd4e577..2ebd4e577 100644
--- a/youtube_dl/extractor/lenta.py
+++ b/youtube_dlc/extractor/lenta.py
diff --git a/youtube_dl/extractor/libraryofcongress.py b/youtube_dlc/extractor/libraryofcongress.py
index 03f205144..03f205144 100644
--- a/youtube_dl/extractor/libraryofcongress.py
+++ b/youtube_dlc/extractor/libraryofcongress.py
diff --git a/youtube_dl/extractor/libsyn.py b/youtube_dlc/extractor/libsyn.py
index 2cf444258..2cf444258 100644
--- a/youtube_dl/extractor/libsyn.py
+++ b/youtube_dlc/extractor/libsyn.py
diff --git a/youtube_dl/extractor/lifenews.py b/youtube_dlc/extractor/lifenews.py
index 42e263bfa..42e263bfa 100644
--- a/youtube_dl/extractor/lifenews.py
+++ b/youtube_dlc/extractor/lifenews.py
diff --git a/youtube_dlc/extractor/limelight.py b/youtube_dlc/extractor/limelight.py
new file mode 100644
index 000000000..39f74d282
--- /dev/null
+++ b/youtube_dlc/extractor/limelight.py
@@ -0,0 +1,358 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ smuggle_url,
+ try_get,
+ unsmuggle_url,
+ ExtractorError,
+)
+
+
+class LimelightBaseIE(InfoExtractor):
+ _PLAYLIST_SERVICE_URL = 'http://production-ps.lvp.llnw.net/r/PlaylistService/%s/%s/%s'
+
+ @classmethod
+ def _extract_urls(cls, webpage, source_url):
+ lm = {
+ 'Media': 'media',
+ 'Channel': 'channel',
+ 'ChannelList': 'channel_list',
+ }
+
+ def smuggle(url):
+ return smuggle_url(url, {'source_url': source_url})
+
+ entries = []
+ for kind, video_id in re.findall(
+ r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})',
+ webpage):
+ entries.append(cls.url_result(
+ smuggle('limelight:%s:%s' % (lm[kind], video_id)),
+ 'Limelight%s' % kind, video_id))
+ for mobj in re.finditer(
+ # As per [1] class attribute should be exactly equal to
+ # LimelightEmbeddedPlayerFlash but numerous examples seen
+ # that don't exactly match it (e.g. [2]).
+ # 1. http://support.3playmedia.com/hc/en-us/articles/227732408-Limelight-Embedding-the-Captions-Plugin-with-the-Limelight-Player-on-Your-Webpage
+ # 2. http://www.sedona.com/FacilitatorTraining2017
+ r'''(?sx)
+ <object[^>]+class=(["\'])(?:(?!\1).)*\bLimelightEmbeddedPlayerFlash\b(?:(?!\1).)*\1[^>]*>.*?
+ <param[^>]+
+ name=(["\'])flashVars\2[^>]+
+ value=(["\'])(?:(?!\3).)*(?P<kind>media|channel(?:List)?)Id=(?P<id>[a-z0-9]{32})
+ ''', webpage):
+ kind, video_id = mobj.group('kind'), mobj.group('id')
+ entries.append(cls.url_result(
+ smuggle('limelight:%s:%s' % (kind, video_id)),
+ 'Limelight%s' % kind.capitalize(), video_id))
+ # http://support.3playmedia.com/hc/en-us/articles/115009517327-Limelight-Embedding-the-Audio-Description-Plugin-with-the-Limelight-Player-on-Your-Web-Page)
+ for video_id in re.findall(
+ r'(?s)LimelightPlayerUtil\.embed\s*\(\s*{.*?\bmediaId["\']\s*:\s*["\'](?P<id>[a-z0-9]{32})',
+ webpage):
+ entries.append(cls.url_result(
+ smuggle('limelight:media:%s' % video_id),
+ LimelightMediaIE.ie_key(), video_id))
+ return entries
+
+ def _call_playlist_service(self, item_id, method, fatal=True, referer=None):
+ headers = {}
+ if referer:
+ headers['Referer'] = referer
+ try:
+ return self._download_json(
+ self._PLAYLIST_SERVICE_URL % (self._PLAYLIST_SERVICE_PATH, item_id, method),
+ item_id, 'Downloading PlaylistService %s JSON' % method,
+ fatal=fatal, headers=headers)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ error = self._parse_json(e.cause.read().decode(), item_id)['detail']['contentAccessPermission']
+ if error == 'CountryDisabled':
+ self.raise_geo_restricted()
+ raise ExtractorError(error, expected=True)
+ raise
+
+ def _extract(self, item_id, pc_method, mobile_method, referer=None):
+ pc = self._call_playlist_service(item_id, pc_method, referer=referer)
+ mobile = self._call_playlist_service(
+ item_id, mobile_method, fatal=False, referer=referer)
+ return pc, mobile
+
+ def _extract_info(self, pc, mobile, i, referer):
+ get_item = lambda x, y: try_get(x, lambda x: x[y][i], dict) or {}
+ pc_item = get_item(pc, 'playlistItems')
+ mobile_item = get_item(mobile, 'mediaList')
+ video_id = pc_item.get('mediaId') or mobile_item['mediaId']
+ title = pc_item.get('title') or mobile_item['title']
+
+ formats = []
+ urls = []
+ for stream in pc_item.get('streams', []):
+ stream_url = stream.get('url')
+ if not stream_url or stream.get('drmProtected') or stream_url in urls:
+ continue
+ urls.append(stream_url)
+ ext = determine_ext(stream_url)
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ stream_url, video_id, f4m_id='hds', fatal=False))
+ else:
+ fmt = {
+ 'url': stream_url,
+ 'abr': float_or_none(stream.get('audioBitRate')),
+ 'fps': float_or_none(stream.get('videoFrameRate')),
+ 'ext': ext,
+ }
+ width = int_or_none(stream.get('videoWidthInPixels'))
+ height = int_or_none(stream.get('videoHeightInPixels'))
+ vbr = float_or_none(stream.get('videoBitRate'))
+ if width or height or vbr:
+ fmt.update({
+ 'width': width,
+ 'height': height,
+ 'vbr': vbr,
+ })
+ else:
+ fmt['vcodec'] = 'none'
+ rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', stream_url)
+ if rtmp:
+ format_id = 'rtmp'
+ if stream.get('videoBitRate'):
+ format_id += '-%d' % int_or_none(stream['videoBitRate'])
+ http_format_id = format_id.replace('rtmp', 'http')
+
+ CDN_HOSTS = (
+ ('delvenetworks.com', 'cpl.delvenetworks.com'),
+ ('video.llnw.net', 's2.content.video.llnw.net'),
+ )
+ for cdn_host, http_host in CDN_HOSTS:
+ if cdn_host not in rtmp.group('host').lower():
+ continue
+ http_url = 'http://%s/%s' % (http_host, rtmp.group('playpath')[4:])
+ urls.append(http_url)
+ if self._is_valid_url(http_url, video_id, http_format_id):
+ http_fmt = fmt.copy()
+ http_fmt.update({
+ 'url': http_url,
+ 'format_id': http_format_id,
+ })
+ formats.append(http_fmt)
+ break
+
+ fmt.update({
+ 'url': rtmp.group('url'),
+ 'play_path': rtmp.group('playpath'),
+ 'app': rtmp.group('app'),
+ 'ext': 'flv',
+ 'format_id': format_id,
+ })
+ formats.append(fmt)
+
+ for mobile_url in mobile_item.get('mobileUrls', []):
+ media_url = mobile_url.get('mobileUrl')
+ format_id = mobile_url.get('targetMediaPlatform')
+ if not media_url or format_id in ('Widevine', 'SmoothStreaming') or media_url in urls:
+ continue
+ urls.append(media_url)
+ ext = determine_ext(media_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ media_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id=format_id, fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ stream_url, video_id, f4m_id=format_id, fatal=False))
+ else:
+ formats.append({
+ 'url': media_url,
+ 'format_id': format_id,
+ 'preference': -1,
+ 'ext': ext,
+ })
+
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for flag in mobile_item.get('flags'):
+ if flag == 'ClosedCaptions':
+ closed_captions = self._call_playlist_service(
+ video_id, 'getClosedCaptionsDetailsByMediaId',
+ False, referer) or []
+ for cc in closed_captions:
+ cc_url = cc.get('webvttFileUrl')
+ if not cc_url:
+ continue
+ lang = cc.get('languageCode') or self._search_regex(r'/[a-z]{2}\.vtt', cc_url, 'lang', default='en')
+ subtitles.setdefault(lang, []).append({
+ 'url': cc_url,
+ })
+ break
+
+ get_meta = lambda x: pc_item.get(x) or mobile_item.get(x)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': get_meta('description'),
+ 'formats': formats,
+ 'duration': float_or_none(get_meta('durationInMilliseconds'), 1000),
+ 'thumbnail': get_meta('previewImageUrl') or get_meta('thumbnailImageUrl'),
+ 'subtitles': subtitles,
+ }
+
+
+class LimelightMediaIE(LimelightBaseIE):
+ IE_NAME = 'limelight'
+ _VALID_URL = r'''(?x)
+ (?:
+ limelight:media:|
+ https?://
+ (?:
+ link\.videoplatform\.limelight\.com/media/|
+ assets\.delvenetworks\.com/player/loader\.swf
+ )
+ \?.*?\bmediaId=
+ )
+ (?P<id>[a-z0-9]{32})
+ '''
+ _TESTS = [{
+ 'url': 'http://link.videoplatform.limelight.com/media/?mediaId=3ffd040b522b4485b6d84effc750cd86',
+ 'info_dict': {
+ 'id': '3ffd040b522b4485b6d84effc750cd86',
+ 'ext': 'mp4',
+ 'title': 'HaP and the HB Prince Trailer',
+ 'description': 'md5:8005b944181778e313d95c1237ddb640',
+ 'thumbnail': r're:^https?://.*\.jpeg$',
+ 'duration': 144.23,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # video with subtitles
+ 'url': 'limelight:media:a3e00274d4564ec4a9b29b9466432335',
+ 'md5': '2fa3bad9ac321e23860ca23bc2c69e3d',
+ 'info_dict': {
+ 'id': 'a3e00274d4564ec4a9b29b9466432335',
+ 'ext': 'mp4',
+ 'title': '3Play Media Overview Video',
+ 'thumbnail': r're:^https?://.*\.jpeg$',
+ 'duration': 78.101,
+ # TODO: extract all languages that were accessible via API
+ # 'subtitles': 'mincount:9',
+ 'subtitles': 'mincount:1',
+ },
+ }, {
+ 'url': 'https://assets.delvenetworks.com/player/loader.swf?mediaId=8018a574f08d416e95ceaccae4ba0452',
+ 'only_matching': True,
+ }]
+ _PLAYLIST_SERVICE_PATH = 'media'
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ video_id = self._match_id(url)
+ source_url = smuggled_data.get('source_url')
+ self._initialize_geo_bypass({
+ 'countries': smuggled_data.get('geo_countries'),
+ })
+
+ pc, mobile = self._extract(
+ video_id, 'getPlaylistByMediaId',
+ 'getMobilePlaylistByMediaId', source_url)
+
+ return self._extract_info(pc, mobile, 0, source_url)
+
+
+class LimelightChannelIE(LimelightBaseIE):
+ IE_NAME = 'limelight:channel'
+ _VALID_URL = r'''(?x)
+ (?:
+ limelight:channel:|
+ https?://
+ (?:
+ link\.videoplatform\.limelight\.com/media/|
+ assets\.delvenetworks\.com/player/loader\.swf
+ )
+ \?.*?\bchannelId=
+ )
+ (?P<id>[a-z0-9]{32})
+ '''
+ _TESTS = [{
+ 'url': 'http://link.videoplatform.limelight.com/media/?channelId=ab6a524c379342f9b23642917020c082',
+ 'info_dict': {
+ 'id': 'ab6a524c379342f9b23642917020c082',
+ 'title': 'Javascript Sample Code',
+ 'description': 'Javascript Sample Code - http://www.delvenetworks.com/sample-code/playerCode-demo.html',
+ },
+ 'playlist_mincount': 3,
+ }, {
+ 'url': 'http://assets.delvenetworks.com/player/loader.swf?channelId=ab6a524c379342f9b23642917020c082',
+ 'only_matching': True,
+ }]
+ _PLAYLIST_SERVICE_PATH = 'channel'
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ channel_id = self._match_id(url)
+ source_url = smuggled_data.get('source_url')
+
+ pc, mobile = self._extract(
+ channel_id, 'getPlaylistByChannelId',
+ 'getMobilePlaylistWithNItemsByChannelId?begin=0&count=-1',
+ source_url)
+
+ entries = [
+ self._extract_info(pc, mobile, i, source_url)
+ for i in range(len(pc['playlistItems']))]
+
+ return self.playlist_result(
+ entries, channel_id, pc.get('title'), mobile.get('description'))
+
+
+class LimelightChannelListIE(LimelightBaseIE):
+ IE_NAME = 'limelight:channel_list'
+ _VALID_URL = r'''(?x)
+ (?:
+ limelight:channel_list:|
+ https?://
+ (?:
+ link\.videoplatform\.limelight\.com/media/|
+ assets\.delvenetworks\.com/player/loader\.swf
+ )
+ \?.*?\bchannelListId=
+ )
+ (?P<id>[a-z0-9]{32})
+ '''
+ _TESTS = [{
+ 'url': 'http://link.videoplatform.limelight.com/media/?channelListId=301b117890c4465c8179ede21fd92e2b',
+ 'info_dict': {
+ 'id': '301b117890c4465c8179ede21fd92e2b',
+ 'title': 'Website - Hero Player',
+ },
+ 'playlist_mincount': 2,
+ }, {
+ 'url': 'https://assets.delvenetworks.com/player/loader.swf?channelListId=301b117890c4465c8179ede21fd92e2b',
+ 'only_matching': True,
+ }]
+ _PLAYLIST_SERVICE_PATH = 'channel_list'
+
+ def _real_extract(self, url):
+ channel_list_id = self._match_id(url)
+
+ channel_list = self._call_playlist_service(
+ channel_list_id, 'getMobileChannelListById')
+
+ entries = [
+ self.url_result('limelight:channel:%s' % channel['id'], 'LimelightChannel')
+ for channel in channel_list['channelList']]
+
+ return self.playlist_result(
+ entries, channel_list_id, channel_list['title'])
diff --git a/youtube_dl/extractor/line.py b/youtube_dlc/extractor/line.py
index 7f5fa446e..7f5fa446e 100644
--- a/youtube_dl/extractor/line.py
+++ b/youtube_dlc/extractor/line.py
diff --git a/youtube_dl/extractor/linkedin.py b/youtube_dlc/extractor/linkedin.py
index 26fc703d1..26fc703d1 100644
--- a/youtube_dl/extractor/linkedin.py
+++ b/youtube_dlc/extractor/linkedin.py
diff --git a/youtube_dlc/extractor/linuxacademy.py b/youtube_dlc/extractor/linuxacademy.py
new file mode 100644
index 000000000..23ca965d9
--- /dev/null
+++ b/youtube_dlc/extractor/linuxacademy.py
@@ -0,0 +1,173 @@
+from __future__ import unicode_literals
+
+import json
+import random
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_HTTPError,
+)
+from ..utils import (
+ ExtractorError,
+ orderedSet,
+ unescapeHTML,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class LinuxAcademyIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?linuxacademy\.com/cp/
+ (?:
+ courses/lesson/course/(?P<chapter_id>\d+)/lesson/(?P<lesson_id>\d+)|
+ modules/view/id/(?P<course_id>\d+)
+ )
+ '''
+ _TESTS = [{
+ 'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2/module/154',
+ 'info_dict': {
+ 'id': '1498-2',
+ 'ext': 'mp4',
+ 'title': "Introduction to the Practitioner's Brief",
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Requires Linux Academy account credentials',
+ }, {
+ 'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://linuxacademy.com/cp/modules/view/id/154',
+ 'info_dict': {
+ 'id': '154',
+ 'title': 'AWS Certified Cloud Practitioner',
+ 'description': 'md5:039db7e60e4aac9cf43630e0a75fa834',
+ },
+ 'playlist_count': 41,
+ 'skip': 'Requires Linux Academy account credentials',
+ }]
+
+ _AUTHORIZE_URL = 'https://login.linuxacademy.com/authorize'
+ _ORIGIN_URL = 'https://linuxacademy.com'
+ _CLIENT_ID = 'KaWxNn1C2Gc7n83W9OFeXltd8Utb5vvx'
+ _NETRC_MACHINE = 'linuxacademy'
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ def random_string():
+ return ''.join([
+ random.choice('0123456789ABCDEFGHIJKLMNOPQRSTUVXYZabcdefghijklmnopqrstuvwxyz-._~')
+ for _ in range(32)])
+
+ webpage, urlh = self._download_webpage_handle(
+ self._AUTHORIZE_URL, None, 'Downloading authorize page', query={
+ 'client_id': self._CLIENT_ID,
+ 'response_type': 'token id_token',
+ 'redirect_uri': self._ORIGIN_URL,
+ 'scope': 'openid email user_impersonation profile',
+ 'audience': self._ORIGIN_URL,
+ 'state': random_string(),
+ 'nonce': random_string(),
+ })
+
+ login_data = self._parse_json(
+ self._search_regex(
+ r'atob\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
+ 'login info', group='value'), None,
+ transform_source=lambda x: compat_b64decode(x).decode('utf-8')
+ )['extraParams']
+
+ login_data.update({
+ 'client_id': self._CLIENT_ID,
+ 'redirect_uri': self._ORIGIN_URL,
+ 'tenant': 'lacausers',
+ 'connection': 'Username-Password-Authentication',
+ 'username': username,
+ 'password': password,
+ 'sso': 'true',
+ })
+
+ login_state_url = urlh.geturl()
+
+ try:
+ login_page = self._download_webpage(
+ 'https://login.linuxacademy.com/usernamepassword/login', None,
+ 'Downloading login page', data=json.dumps(login_data).encode(),
+ headers={
+ 'Content-Type': 'application/json',
+ 'Origin': 'https://login.linuxacademy.com',
+ 'Referer': login_state_url,
+ })
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ error = self._parse_json(e.cause.read(), None)
+ message = error.get('description') or error['code']
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, message), expected=True)
+ raise
+
+ callback_page, urlh = self._download_webpage_handle(
+ 'https://login.linuxacademy.com/login/callback', None,
+ 'Downloading callback page',
+ data=urlencode_postdata(self._hidden_inputs(login_page)),
+ headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ 'Origin': 'https://login.linuxacademy.com',
+ 'Referer': login_state_url,
+ })
+
+ access_token = self._search_regex(
+ r'access_token=([^=&]+)', urlh.geturl(),
+ 'access token')
+
+ self._download_webpage(
+ 'https://linuxacademy.com/cp/login/tokenValidateLogin/token/%s'
+ % access_token, None, 'Downloading token validation page')
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ chapter_id, lecture_id, course_id = mobj.group('chapter_id', 'lesson_id', 'course_id')
+ item_id = course_id if course_id else '%s-%s' % (chapter_id, lecture_id)
+
+ webpage = self._download_webpage(url, item_id)
+
+ # course path
+ if course_id:
+ entries = [
+ self.url_result(
+ urljoin(url, lesson_url), ie=LinuxAcademyIE.ie_key())
+ for lesson_url in orderedSet(re.findall(
+ r'<a[^>]+\bhref=["\'](/cp/courses/lesson/course/\d+/lesson/\d+/module/\d+)',
+ webpage))]
+ title = unescapeHTML(self._html_search_regex(
+ (r'class=["\']course-title["\'][^>]*>(?P<value>[^<]+)',
+ r'var\s+title\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'),
+ webpage, 'title', default=None, group='value'))
+ description = unescapeHTML(self._html_search_regex(
+ r'var\s+description\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'description', default=None, group='value'))
+ return self.playlist_result(entries, course_id, title, description)
+
+ # single video path
+ info = self._extract_jwplayer_data(
+ webpage, item_id, require_title=False, m3u8_id='hls',)
+ title = self._search_regex(
+ (r'>Lecture\s*:\s*(?P<value>[^<]+)',
+ r'lessonName\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
+ 'title', group='value')
+ info.update({
+ 'id': item_id,
+ 'title': title,
+ })
+ return info
diff --git a/youtube_dl/extractor/litv.py b/youtube_dlc/extractor/litv.py
index 337b1b15c..337b1b15c 100644
--- a/youtube_dl/extractor/litv.py
+++ b/youtube_dlc/extractor/litv.py
diff --git a/youtube_dl/extractor/livejournal.py b/youtube_dlc/extractor/livejournal.py
index 3a9f4553f..3a9f4553f 100644
--- a/youtube_dl/extractor/livejournal.py
+++ b/youtube_dlc/extractor/livejournal.py
diff --git a/youtube_dl/extractor/liveleak.py b/youtube_dlc/extractor/liveleak.py
index 4ac437c8b..4ac437c8b 100644
--- a/youtube_dl/extractor/liveleak.py
+++ b/youtube_dlc/extractor/liveleak.py
diff --git a/youtube_dl/extractor/livestream.py b/youtube_dlc/extractor/livestream.py
index e55b1a202..e55b1a202 100644
--- a/youtube_dl/extractor/livestream.py
+++ b/youtube_dlc/extractor/livestream.py
diff --git a/youtube_dlc/extractor/lnkgo.py b/youtube_dlc/extractor/lnkgo.py
new file mode 100644
index 000000000..3e71852aa
--- /dev/null
+++ b/youtube_dlc/extractor/lnkgo.py
@@ -0,0 +1,88 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ compat_str,
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class LnkGoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?lnk(?:go)?\.(?:alfa\.)?lt/(?:visi-video/[^/]+|video)/(?P<id>[A-Za-z0-9-]+)(?:/(?P<episode_id>\d+))?'
+ _TESTS = [{
+ 'url': 'http://www.lnkgo.lt/visi-video/aktualai-pratesimas/ziurek-putka-trys-klausimai',
+ 'info_dict': {
+ 'id': '10809',
+ 'ext': 'mp4',
+ 'title': "Put'ka: Trys Klausimai",
+ 'upload_date': '20161216',
+ 'description': 'Seniai matytas Put’ka užduoda tris klausimėlius. Pabandykime surasti atsakymus.',
+ 'age_limit': 18,
+ 'duration': 117,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1481904000,
+ },
+ 'params': {
+ 'skip_download': True, # HLS download
+ },
+ }, {
+ 'url': 'http://lnkgo.alfa.lt/visi-video/aktualai-pratesimas/ziurek-nerdas-taiso-kompiuteri-2',
+ 'info_dict': {
+ 'id': '10467',
+ 'ext': 'mp4',
+ 'title': 'Nėrdas: Kompiuterio Valymas',
+ 'upload_date': '20150113',
+ 'description': 'md5:7352d113a242a808676ff17e69db6a69',
+ 'age_limit': 18,
+ 'duration': 346,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1421164800,
+ },
+ 'params': {
+ 'skip_download': True, # HLS download
+ },
+ }, {
+ 'url': 'https://lnk.lt/video/neigalieji-tv-bokste/37413',
+ 'only_matching': True,
+ }]
+ _AGE_LIMITS = {
+ 'N-7': 7,
+ 'N-14': 14,
+ 'S': 18,
+ }
+ _M3U8_TEMPL = 'https://vod.lnk.lt/lnk_vod/lnk/lnk/%s:%s/playlist.m3u8%s'
+
+ def _real_extract(self, url):
+ display_id, video_id = re.match(self._VALID_URL, url).groups()
+
+ video_info = self._download_json(
+ 'https://lnk.lt/api/main/video-page/%s/%s/false' % (display_id, video_id or '0'),
+ display_id)['videoConfig']['videoInfo']
+
+ video_id = compat_str(video_info['id'])
+ title = video_info['title']
+ prefix = 'smil' if video_info.get('isQualityChangeAvailable') else 'mp4'
+ formats = self._extract_m3u8_formats(
+ self._M3U8_TEMPL % (prefix, video_info['videoUrl'], video_info.get('secureTokenParams') or ''),
+ video_id, 'mp4', 'm3u8_native')
+ self._sort_formats(formats)
+
+ poster_image = video_info.get('posterImage')
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': 'https://lnk.lt/all-images/' + poster_image if poster_image else None,
+ 'duration': int_or_none(video_info.get('duration')),
+ 'description': clean_html(video_info.get('htmlDescription')),
+ 'age_limit': self._AGE_LIMITS.get(video_info.get('pgRating'), 0),
+ 'timestamp': parse_iso8601(video_info.get('airDate')),
+ 'view_count': int_or_none(video_info.get('viewsCount')),
+ }
diff --git a/youtube_dl/extractor/localnews8.py b/youtube_dlc/extractor/localnews8.py
index aad396135..aad396135 100644
--- a/youtube_dl/extractor/localnews8.py
+++ b/youtube_dlc/extractor/localnews8.py
diff --git a/youtube_dl/extractor/lovehomeporn.py b/youtube_dlc/extractor/lovehomeporn.py
index 8f65a3c03..8f65a3c03 100644
--- a/youtube_dl/extractor/lovehomeporn.py
+++ b/youtube_dlc/extractor/lovehomeporn.py
diff --git a/youtube_dl/extractor/lrt.py b/youtube_dlc/extractor/lrt.py
index f5c997ef4..f5c997ef4 100644
--- a/youtube_dl/extractor/lrt.py
+++ b/youtube_dlc/extractor/lrt.py
diff --git a/youtube_dl/extractor/lynda.py b/youtube_dlc/extractor/lynda.py
index b3d8653d0..b3d8653d0 100644
--- a/youtube_dl/extractor/lynda.py
+++ b/youtube_dlc/extractor/lynda.py
diff --git a/youtube_dl/extractor/m6.py b/youtube_dlc/extractor/m6.py
index 9806875e8..9806875e8 100644
--- a/youtube_dl/extractor/m6.py
+++ b/youtube_dlc/extractor/m6.py
diff --git a/youtube_dlc/extractor/mailru.py b/youtube_dlc/extractor/mailru.py
new file mode 100644
index 000000000..65cc474db
--- /dev/null
+++ b/youtube_dlc/extractor/mailru.py
@@ -0,0 +1,329 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urllib_parse_unquote
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ remove_end,
+ try_get,
+)
+
+
+class MailRuIE(InfoExtractor):
+ IE_NAME = 'mailru'
+ IE_DESC = 'Видео@Mail.Ru'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:(?:www|m)\.)?my\.mail\.ru/+
+ (?:
+ video/.*\#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|
+ (?:(?P<idv2prefix>(?:[^/]+/+){2})video/(?P<idv2suffix>[^/]+/\d+))\.html|
+ (?:video/embed|\+/video/meta)/(?P<metaid>\d+)
+ )
+ '''
+ _TESTS = [
+ {
+ 'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76',
+ 'md5': 'dea205f03120046894db4ebb6159879a',
+ 'info_dict': {
+ 'id': '46301138_76',
+ 'ext': 'mp4',
+ 'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро',
+ 'timestamp': 1393235077,
+ 'upload_date': '20140224',
+ 'uploader': 'sonypicturesrus',
+ 'uploader_id': 'sonypicturesrus@mail.ru',
+ 'duration': 184,
+ },
+ 'skip': 'Not accessible from Travis CI server',
+ },
+ {
+ 'url': 'http://my.mail.ru/corp/hitech/video/news_hi-tech_mail_ru/1263.html',
+ 'md5': '00a91a58c3402204dcced523777b475f',
+ 'info_dict': {
+ 'id': '46843144_1263',
+ 'ext': 'mp4',
+ 'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion',
+ 'timestamp': 1397039888,
+ 'upload_date': '20140409',
+ 'uploader': 'hitech',
+ 'uploader_id': 'hitech@corp.mail.ru',
+ 'duration': 245,
+ },
+ 'skip': 'Not accessible from Travis CI server',
+ },
+ {
+ # only available via metaUrl API
+ 'url': 'http://my.mail.ru/mail/720pizle/video/_myvideo/502.html',
+ 'md5': '3b26d2491c6949d031a32b96bd97c096',
+ 'info_dict': {
+ 'id': '56664382_502',
+ 'ext': 'mp4',
+ 'title': ':8336',
+ 'timestamp': 1449094163,
+ 'upload_date': '20151202',
+ 'uploader': '720pizle@mail.ru',
+ 'uploader_id': '720pizle@mail.ru',
+ 'duration': 6001,
+ },
+ 'skip': 'Not accessible from Travis CI server',
+ },
+ {
+ 'url': 'http://m.my.mail.ru/mail/3sktvtr/video/_myvideo/138.html',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://my.mail.ru/video/embed/7949340477499637815',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://my.mail.ru/+/video/meta/7949340477499637815',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://my.mail.ru//list/sinyutin10/video/_myvideo/4.html',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://my.mail.ru//list//sinyutin10/video/_myvideo/4.html',
+ 'only_matching': True,
+ }
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ meta_id = mobj.group('metaid')
+
+ video_id = None
+ if meta_id:
+ meta_url = 'https://my.mail.ru/+/video/meta/%s' % meta_id
+ else:
+ video_id = mobj.group('idv1')
+ if not video_id:
+ video_id = mobj.group('idv2prefix') + mobj.group('idv2suffix')
+ webpage = self._download_webpage(url, video_id)
+ page_config = self._parse_json(self._search_regex(
+ r'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>',
+ webpage, 'page config', default='{}'), video_id, fatal=False)
+ if page_config:
+ meta_url = page_config.get('metaUrl') or page_config.get('video', {}).get('metaUrl')
+ else:
+ meta_url = None
+
+ video_data = None
+ if meta_url:
+ video_data = self._download_json(
+ meta_url, video_id or meta_id, 'Downloading video meta JSON',
+ fatal=not video_id)
+
+ # Fallback old approach
+ if not video_data:
+ video_data = self._download_json(
+ 'http://api.video.mail.ru/videos/%s.json?new=1' % video_id,
+ video_id, 'Downloading video JSON')
+
+ headers = {}
+
+ video_key = self._get_cookies('https://my.mail.ru').get('video_key')
+ if video_key:
+ headers['Cookie'] = 'video_key=%s' % video_key.value
+
+ formats = []
+ for f in video_data['videos']:
+ video_url = f.get('url')
+ if not video_url:
+ continue
+ format_id = f.get('key')
+ height = int_or_none(self._search_regex(
+ r'^(\d+)[pP]$', format_id, 'height', default=None)) if format_id else None
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ 'height': height,
+ 'http_headers': headers,
+ })
+ self._sort_formats(formats)
+
+ meta_data = video_data['meta']
+ title = remove_end(meta_data['title'], '.mp4')
+
+ author = video_data.get('author')
+ uploader = author.get('name')
+ uploader_id = author.get('id') or author.get('email')
+ view_count = int_or_none(video_data.get('viewsCount') or video_data.get('views_count'))
+
+ acc_id = meta_data.get('accId')
+ item_id = meta_data.get('itemId')
+ content_id = '%s_%s' % (acc_id, item_id) if acc_id and item_id else video_id
+
+ thumbnail = meta_data.get('poster')
+ duration = int_or_none(meta_data.get('duration'))
+ timestamp = int_or_none(meta_data.get('timestamp'))
+
+ return {
+ 'id': content_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'formats': formats,
+ }
+
+
+class MailRuMusicSearchBaseIE(InfoExtractor):
+ def _search(self, query, url, audio_id, limit=100, offset=0):
+ search = self._download_json(
+ 'https://my.mail.ru/cgi-bin/my/ajax', audio_id,
+ 'Downloading songs JSON page %d' % (offset // limit + 1),
+ headers={
+ 'Referer': url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ }, query={
+ 'xemail': '',
+ 'ajax_call': '1',
+ 'func_name': 'music.search',
+ 'mna': '',
+ 'mnb': '',
+ 'arg_query': query,
+ 'arg_extended': '1',
+ 'arg_search_params': json.dumps({
+ 'music': {
+ 'limit': limit,
+ 'offset': offset,
+ },
+ }),
+ 'arg_limit': limit,
+ 'arg_offset': offset,
+ })
+ return next(e for e in search if isinstance(e, dict))
+
+ @staticmethod
+ def _extract_track(t, fatal=True):
+ audio_url = t['URL'] if fatal else t.get('URL')
+ if not audio_url:
+ return
+
+ audio_id = t['File'] if fatal else t.get('File')
+ if not audio_id:
+ return
+
+ thumbnail = t.get('AlbumCoverURL') or t.get('FiledAlbumCover')
+ uploader = t.get('OwnerName') or t.get('OwnerName_Text_HTML')
+ uploader_id = t.get('UploaderID')
+ duration = int_or_none(t.get('DurationInSeconds')) or parse_duration(
+ t.get('Duration') or t.get('DurationStr'))
+ view_count = int_or_none(t.get('PlayCount') or t.get('PlayCount_hr'))
+
+ track = t.get('Name') or t.get('Name_Text_HTML')
+ artist = t.get('Author') or t.get('Author_Text_HTML')
+
+ if track:
+ title = '%s - %s' % (artist, track) if artist else track
+ else:
+ title = audio_id
+
+ return {
+ 'extractor_key': MailRuMusicIE.ie_key(),
+ 'id': audio_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'vcodec': 'none',
+ 'abr': int_or_none(t.get('BitRate')),
+ 'track': track,
+ 'artist': artist,
+ 'album': t.get('Album'),
+ 'url': audio_url,
+ }
+
+
+class MailRuMusicIE(MailRuMusicSearchBaseIE):
+ IE_NAME = 'mailru:music'
+ IE_DESC = 'Музыка@Mail.Ru'
+ _VALID_URL = r'https?://my\.mail\.ru/+music/+songs/+[^/?#&]+-(?P<id>[\da-f]+)'
+ _TESTS = [{
+ 'url': 'https://my.mail.ru/music/songs/%D0%BC8%D0%BB8%D1%82%D1%85-l-a-h-luciferian-aesthetics-of-herrschaft-single-2017-4e31f7125d0dfaef505d947642366893',
+ 'md5': '0f8c22ef8c5d665b13ac709e63025610',
+ 'info_dict': {
+ 'id': '4e31f7125d0dfaef505d947642366893',
+ 'ext': 'mp3',
+ 'title': 'L.A.H. (Luciferian Aesthetics of Herrschaft) single, 2017 - М8Л8ТХ',
+ 'uploader': 'Игорь Мудрый',
+ 'uploader_id': '1459196328',
+ 'duration': 280,
+ 'view_count': int,
+ 'vcodec': 'none',
+ 'abr': 320,
+ 'track': 'L.A.H. (Luciferian Aesthetics of Herrschaft) single, 2017',
+ 'artist': 'М8Л8ТХ',
+ },
+ }]
+
+ def _real_extract(self, url):
+ audio_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, audio_id)
+
+ title = self._og_search_title(webpage)
+ music_data = self._search(title, url, audio_id)['MusicData']
+ t = next(t for t in music_data if t.get('File') == audio_id)
+
+ info = self._extract_track(t)
+ info['title'] = title
+ return info
+
+
+class MailRuMusicSearchIE(MailRuMusicSearchBaseIE):
+ IE_NAME = 'mailru:music:search'
+ IE_DESC = 'Музыка@Mail.Ru'
+ _VALID_URL = r'https?://my\.mail\.ru/+music/+search/+(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://my.mail.ru/music/search/black%20shadow',
+ 'info_dict': {
+ 'id': 'black shadow',
+ },
+ 'playlist_mincount': 532,
+ }]
+
+ def _real_extract(self, url):
+ query = compat_urllib_parse_unquote(self._match_id(url))
+
+ entries = []
+
+ LIMIT = 100
+ offset = 0
+
+ for _ in itertools.count(1):
+ search = self._search(query, url, query, LIMIT, offset)
+
+ music_data = search.get('MusicData')
+ if not music_data or not isinstance(music_data, list):
+ break
+
+ for t in music_data:
+ track = self._extract_track(t, fatal=False)
+ if track:
+ entries.append(track)
+
+ total = try_get(
+ search, lambda x: x['Results']['music']['Total'], int)
+
+ if total is not None:
+ if offset > total:
+ break
+
+ offset += LIMIT
+
+ return self.playlist_result(entries, query)
diff --git a/youtube_dlc/extractor/malltv.py b/youtube_dlc/extractor/malltv.py
new file mode 100644
index 000000000..6f4fd927f
--- /dev/null
+++ b/youtube_dlc/extractor/malltv.py
@@ -0,0 +1,56 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import merge_dicts
+
+
+class MallTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www|sk)\.)?mall\.tv/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.mall.tv/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
+ 'md5': '1c4a37f080e1f3023103a7b43458e518',
+ 'info_dict': {
+ 'id': 't0zzt0',
+ 'display_id': '18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
+ 'ext': 'mp4',
+ 'title': '18 miliard pro neziskovky. Opravdu jsou sportovci nebo Člověk v tísni pijavice?',
+ 'description': 'md5:25fc0ec42a72ba602b602c683fa29deb',
+ 'duration': 216,
+ 'timestamp': 1538870400,
+ 'upload_date': '20181007',
+ 'view_count': int,
+ }
+ }, {
+ 'url': 'https://www.mall.tv/kdo-to-plati/18-miliard-pro-neziskovky-opravdu-jsou-sportovci-nebo-clovek-v-tisni-pijavice',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://sk.mall.tv/gejmhaus/reklamacia-nehreje-vyrobnik-tepla-alebo-spekacka',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ url, display_id, headers=self.geo_verification_headers())
+
+ SOURCE_RE = r'(<source[^>]+\bsrc=(?:(["\'])(?:(?!\2).)+|[^\s]+)/(?P<id>[\da-z]+)/index)\b'
+ video_id = self._search_regex(
+ SOURCE_RE, webpage, 'video id', group='id')
+
+ media = self._parse_html5_media_entries(
+ url, re.sub(SOURCE_RE, r'\1.m3u8', webpage), video_id,
+ m3u8_id='hls', m3u8_entry_protocol='m3u8_native')[0]
+
+ info = self._search_json_ld(webpage, video_id, default={})
+
+ return merge_dicts(media, info, {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': self._og_search_title(webpage, default=None) or display_id,
+ 'description': self._og_search_description(webpage, default=None),
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
+ })
diff --git a/youtube_dlc/extractor/mangomolo.py b/youtube_dlc/extractor/mangomolo.py
new file mode 100644
index 000000000..acee370e9
--- /dev/null
+++ b/youtube_dlc/extractor/mangomolo.py
@@ -0,0 +1,58 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_urllib_parse_unquote,
+)
+from ..utils import int_or_none
+
+
+class MangomoloBaseIE(InfoExtractor):
+ _BASE_REGEX = r'https?://(?:admin\.mangomolo\.com/analytics/index\.php/customers/embed/|player\.mangomolo\.com/v1/)'
+
+ def _get_real_id(self, page_id):
+ return page_id
+
+ def _real_extract(self, url):
+ page_id = self._get_real_id(self._match_id(url))
+ webpage = self._download_webpage(
+ 'https://player.mangomolo.com/v1/%s?%s' % (self._TYPE, url.split('?')[1]), page_id)
+ hidden_inputs = self._hidden_inputs(webpage)
+ m3u8_entry_protocol = 'm3u8' if self._IS_LIVE else 'm3u8_native'
+
+ format_url = self._html_search_regex(
+ [
+ r'(?:file|src)\s*:\s*"(https?://[^"]+?/playlist\.m3u8)',
+ r'<a[^>]+href="(rtsp://[^"]+)"'
+ ], webpage, 'format url')
+ formats = self._extract_wowza_formats(
+ format_url, page_id, m3u8_entry_protocol, ['smil'])
+ self._sort_formats(formats)
+
+ return {
+ 'id': page_id,
+ 'title': self._live_title(page_id) if self._IS_LIVE else page_id,
+ 'uploader_id': hidden_inputs.get('userid'),
+ 'duration': int_or_none(hidden_inputs.get('duration')),
+ 'is_live': self._IS_LIVE,
+ 'formats': formats,
+ }
+
+
+class MangomoloVideoIE(MangomoloBaseIE):
+ _TYPE = 'video'
+ IE_NAME = 'mangomolo:' + _TYPE
+ _VALID_URL = MangomoloBaseIE._BASE_REGEX + r'video\?.*?\bid=(?P<id>\d+)'
+ _IS_LIVE = False
+
+
+class MangomoloLiveIE(MangomoloBaseIE):
+ _TYPE = 'live'
+ IE_NAME = 'mangomolo:' + _TYPE
+ _VALID_URL = MangomoloBaseIE._BASE_REGEX + r'(live|index)\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)'
+ _IS_LIVE = True
+
+ def _get_real_id(self, page_id):
+ return compat_b64decode(compat_urllib_parse_unquote(page_id)).decode()
diff --git a/youtube_dl/extractor/manyvids.py b/youtube_dlc/extractor/manyvids.py
index e8d7163e4..e8d7163e4 100644
--- a/youtube_dl/extractor/manyvids.py
+++ b/youtube_dlc/extractor/manyvids.py
diff --git a/youtube_dl/extractor/markiza.py b/youtube_dlc/extractor/markiza.py
index def960a0c..def960a0c 100644
--- a/youtube_dl/extractor/markiza.py
+++ b/youtube_dlc/extractor/markiza.py
diff --git a/youtube_dl/extractor/massengeschmacktv.py b/youtube_dlc/extractor/massengeschmacktv.py
index cfcc6b224..cfcc6b224 100644
--- a/youtube_dl/extractor/massengeschmacktv.py
+++ b/youtube_dlc/extractor/massengeschmacktv.py
diff --git a/youtube_dl/extractor/matchtv.py b/youtube_dlc/extractor/matchtv.py
index bc9933a81..bc9933a81 100644
--- a/youtube_dl/extractor/matchtv.py
+++ b/youtube_dlc/extractor/matchtv.py
diff --git a/youtube_dl/extractor/mdr.py b/youtube_dlc/extractor/mdr.py
index 322e5b45a..322e5b45a 100644
--- a/youtube_dl/extractor/mdr.py
+++ b/youtube_dlc/extractor/mdr.py
diff --git a/youtube_dl/extractor/medialaan.py b/youtube_dlc/extractor/medialaan.py
index 50d5db802..50d5db802 100644
--- a/youtube_dl/extractor/medialaan.py
+++ b/youtube_dlc/extractor/medialaan.py
diff --git a/youtube_dlc/extractor/mediaset.py b/youtube_dlc/extractor/mediaset.py
new file mode 100644
index 000000000..933df1495
--- /dev/null
+++ b/youtube_dlc/extractor/mediaset.py
@@ -0,0 +1,179 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .theplatform import ThePlatformBaseIE
+from ..compat import (
+ compat_parse_qs,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ update_url_query,
+)
+
+
+class MediasetIE(ThePlatformBaseIE):
+ _TP_TLD = 'eu'
+ _VALID_URL = r'''(?x)
+ (?:
+ mediaset:|
+ https?://
+ (?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/
+ (?:
+ (?:video|on-demand)/(?:[^/]+/)+[^/]+_|
+ player/index\.html\?.*?\bprogramGuid=
+ )
+ )(?P<id>[0-9A-Z]{16,})
+ '''
+ _TESTS = [{
+ # full episode
+ 'url': 'https://www.mediasetplay.mediaset.it/video/hellogoodbye/quarta-puntata_FAFU000000661824',
+ 'md5': '9b75534d42c44ecef7bf1ffeacb7f85d',
+ 'info_dict': {
+ 'id': 'FAFU000000661824',
+ 'ext': 'mp4',
+ 'title': 'Quarta puntata',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 1414.26,
+ 'upload_date': '20161107',
+ 'series': 'Hello Goodbye',
+ 'timestamp': 1478532900,
+ 'uploader': 'Rete 4',
+ 'uploader_id': 'R4',
+ },
+ }, {
+ 'url': 'https://www.mediasetplay.mediaset.it/video/matrix/puntata-del-25-maggio_F309013801000501',
+ 'md5': '288532f0ad18307705b01e581304cd7b',
+ 'info_dict': {
+ 'id': 'F309013801000501',
+ 'ext': 'mp4',
+ 'title': 'Puntata del 25 maggio',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 6565.007,
+ 'upload_date': '20180526',
+ 'series': 'Matrix',
+ 'timestamp': 1527326245,
+ 'uploader': 'Canale 5',
+ 'uploader_id': 'C5',
+ },
+ }, {
+ # clip
+ 'url': 'https://www.mediasetplay.mediaset.it/video/gogglebox/un-grande-classico-della-commedia-sexy_FAFU000000661680',
+ 'only_matching': True,
+ }, {
+ # iframe simple
+ 'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665924&id=665924',
+ 'only_matching': True,
+ }, {
+ # iframe twitter (from http://www.wittytv.it/se-prima-mi-fidavo-zero/)
+ 'url': 'https://static3.mediasetplay.mediaset.it/player/index.html?appKey=5ad3966b1de1c4000d5cec48&programGuid=FAFU000000665104&id=665104',
+ 'only_matching': True,
+ }, {
+ 'url': 'mediaset:FAFU000000665924',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.mediasetplay.mediaset.it/video/mediasethaacuoreilfuturo/palmieri-alicudi-lisola-dei-tre-bambini-felici--un-decreto-per-alicudi-e-tutte-le-microscuole_FD00000000102295',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.mediasetplay.mediaset.it/video/cherryseason/anticipazioni-degli-episodi-del-23-ottobre_F306837101005C02',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.mediasetplay.mediaset.it/video/tg5/ambiente-onda-umana-per-salvare-il-pianeta_F309453601079D01',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.mediasetplay.mediaset.it/video/grandefratellovip/benedetta-una-doccia-gelata_F309344401044C135',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(ie, webpage):
+ def _qs(url):
+ return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+
+ def _program_guid(qs):
+ return qs.get('programGuid', [None])[0]
+
+ entries = []
+ for mobj in re.finditer(
+ r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml.*?)\1',
+ webpage):
+ embed_url = mobj.group('url')
+ embed_qs = _qs(embed_url)
+ program_guid = _program_guid(embed_qs)
+ if program_guid:
+ entries.append(embed_url)
+ continue
+ video_id = embed_qs.get('id', [None])[0]
+ if not video_id:
+ continue
+ urlh = ie._request_webpage(
+ embed_url, video_id, note='Following embed URL redirect')
+ embed_url = urlh.geturl()
+ program_guid = _program_guid(_qs(embed_url))
+ if program_guid:
+ entries.append(embed_url)
+ return entries
+
+ def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None, transform_rtmp_url=None):
+ for video in smil.findall(self._xpath_ns('.//video', namespace)):
+ video.attrib['src'] = re.sub(r'(https?://vod05)t(-mediaset-it\.akamaized\.net/.+?.mpd)\?.+', r'\1\2', video.attrib['src'])
+ return super(MediasetIE, self)._parse_smil_formats(smil, smil_url, video_id, namespace, f4m_params, transform_rtmp_url)
+
+ def _real_extract(self, url):
+ guid = self._match_id(url)
+ tp_path = 'PR1GhC/media/guid/2702976343/' + guid
+ info = self._extract_theplatform_metadata(tp_path, guid)
+
+ formats = []
+ subtitles = {}
+ first_e = None
+ for asset_type in ('SD', 'HD'):
+ # TODO: fixup ISM+none manifest URLs
+ for f in ('MPEG4', 'MPEG-DASH+none', 'M3U+none'):
+ try:
+ tp_formats, tp_subtitles = self._extract_theplatform_smil(
+ update_url_query('http://link.theplatform.%s/s/%s' % (self._TP_TLD, tp_path), {
+ 'mbr': 'true',
+ 'formats': f,
+ 'assetTypes': asset_type,
+ }), guid, 'Downloading %s %s SMIL data' % (f.split('+')[0], asset_type))
+ except ExtractorError as e:
+ if not first_e:
+ first_e = e
+ break
+ for tp_f in tp_formats:
+ tp_f['quality'] = 1 if asset_type == 'HD' else 0
+ formats.extend(tp_formats)
+ subtitles = self._merge_subtitles(subtitles, tp_subtitles)
+ if first_e and not formats:
+ raise first_e
+ self._sort_formats(formats)
+
+ fields = []
+ for templ, repls in (('tvSeason%sNumber', ('', 'Episode')), ('mediasetprogram$%s', ('brandTitle', 'numberOfViews', 'publishInfo'))):
+ fields.extend(templ % repl for repl in repls)
+ feed_data = self._download_json(
+ 'https://feed.entertainment.tv.theplatform.eu/f/PR1GhC/mediaset-prod-all-programs/guid/-/' + guid,
+ guid, fatal=False, query={'fields': ','.join(fields)})
+ if feed_data:
+ publish_info = feed_data.get('mediasetprogram$publishInfo') or {}
+ info.update({
+ 'episode_number': int_or_none(feed_data.get('tvSeasonEpisodeNumber')),
+ 'season_number': int_or_none(feed_data.get('tvSeasonNumber')),
+ 'series': feed_data.get('mediasetprogram$brandTitle'),
+ 'uploader': publish_info.get('description'),
+ 'uploader_id': publish_info.get('channel'),
+ 'view_count': int_or_none(feed_data.get('mediasetprogram$numberOfViews')),
+ })
+
+ info.update({
+ 'id': guid,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ })
+ return info
diff --git a/youtube_dlc/extractor/mediasite.py b/youtube_dlc/extractor/mediasite.py
new file mode 100644
index 000000000..d6eb15740
--- /dev/null
+++ b/youtube_dlc/extractor/mediasite.py
@@ -0,0 +1,366 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ float_or_none,
+ mimetype2ext,
+ str_or_none,
+ try_get,
+ unescapeHTML,
+ unsmuggle_url,
+ url_or_none,
+ urljoin,
+)
+
+
+_ID_RE = r'(?:[0-9a-f]{32,34}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12,14})'
+
+
+class MediasiteIE(InfoExtractor):
+ _VALID_URL = r'(?xi)https?://[^/]+/Mediasite/(?:Play|Showcase/(?:default|livebroadcast)/Presentation)/(?P<id>%s)(?P<query>\?[^#]+|)' % _ID_RE
+ _TESTS = [
+ {
+ 'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271681e4f199af3c60d1f82869b1d',
+ 'info_dict': {
+ 'id': '2db6c271681e4f199af3c60d1f82869b1d',
+ 'ext': 'mp4',
+ 'title': 'Lecture: Tuesday, September 20, 2016 - Sir Andrew Wiles',
+ 'description': 'Sir Andrew Wiles: “Equations in arithmetic”\\n\\nI will describe some of the interactions between modern number theory and the problem of solving equations in rational numbers or integers\\u0027.',
+ 'timestamp': 1474268400.0,
+ 'upload_date': '20160919',
+ },
+ },
+ {
+ 'url': 'http://mediasite.uib.no/Mediasite/Play/90bb363295d945d6b548c867d01181361d?catalog=a452b7df-9ae1-46b7-a3ba-aceeb285f3eb',
+ 'info_dict': {
+ 'id': '90bb363295d945d6b548c867d01181361d',
+ 'ext': 'mp4',
+ 'upload_date': '20150429',
+ 'title': '5) IT-forum 2015-Dag 1 - Dungbeetle - How and why Rain created a tiny bug tracker for Unity',
+ 'timestamp': 1430311380.0,
+ },
+ },
+ {
+ 'url': 'https://collegerama.tudelft.nl/Mediasite/Play/585a43626e544bdd97aeb71a0ec907a01d',
+ 'md5': '481fda1c11f67588c0d9d8fbdced4e39',
+ 'info_dict': {
+ 'id': '585a43626e544bdd97aeb71a0ec907a01d',
+ 'ext': 'mp4',
+ 'title': 'Een nieuwe wereld: waarden, bewustzijn en techniek van de mensheid 2.0.',
+ 'description': '',
+ 'thumbnail': r're:^https?://.*\.jpg(?:\?.*)?$',
+ 'duration': 7713.088,
+ 'timestamp': 1413309600,
+ 'upload_date': '20141014',
+ },
+ },
+ {
+ 'url': 'https://collegerama.tudelft.nl/Mediasite/Play/86a9ea9f53e149079fbdb4202b521ed21d?catalog=fd32fd35-6c99-466c-89d4-cd3c431bc8a4',
+ 'md5': 'ef1fdded95bdf19b12c5999949419c92',
+ 'info_dict': {
+ 'id': '86a9ea9f53e149079fbdb4202b521ed21d',
+ 'ext': 'wmv',
+ 'title': '64ste Vakantiecursus: Afvalwater',
+ 'description': 'md5:7fd774865cc69d972f542b157c328305',
+ 'thumbnail': r're:^https?://.*\.jpg(?:\?.*?)?$',
+ 'duration': 10853,
+ 'timestamp': 1326446400,
+ 'upload_date': '20120113',
+ },
+ },
+ {
+ 'url': 'http://digitalops.sandia.gov/Mediasite/Play/24aace4429fc450fb5b38cdbf424a66e1d',
+ 'md5': '9422edc9b9a60151727e4b6d8bef393d',
+ 'info_dict': {
+ 'id': '24aace4429fc450fb5b38cdbf424a66e1d',
+ 'ext': 'mp4',
+ 'title': 'Xyce Software Training - Section 1',
+ 'description': r're:(?s)SAND Number: SAND 2013-7800.{200,}',
+ 'upload_date': '20120409',
+ 'timestamp': 1333983600,
+ 'duration': 7794,
+ }
+ },
+ {
+ 'url': 'https://collegerama.tudelft.nl/Mediasite/Showcase/livebroadcast/Presentation/ada7020854f743c49fbb45c9ec7dbb351d',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://mediasite.ntnu.no/Mediasite/Showcase/default/Presentation/7d8b913259334b688986e970fae6fcb31d',
+ 'only_matching': True,
+ },
+ {
+ # dashed id
+ 'url': 'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271-681e-4f19-9af3-c60d1f82869b1d',
+ 'only_matching': True,
+ }
+ ]
+
+ # look in Mediasite.Core.js (Mediasite.ContentStreamType[*])
+ _STREAM_TYPES = {
+ 0: 'video1', # the main video
+ 2: 'slide',
+ 3: 'presentation',
+ 4: 'video2', # screencast?
+ 5: 'video3',
+ }
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [
+ unescapeHTML(mobj.group('url'))
+ for mobj in re.finditer(
+ r'(?xi)<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:(?:https?:)?//[^/]+)?/Mediasite/Play/%s(?:\?.*?)?)\1' % _ID_RE,
+ webpage)]
+
+ def _real_extract(self, url):
+ url, data = unsmuggle_url(url, {})
+ mobj = re.match(self._VALID_URL, url)
+ resource_id = mobj.group('id')
+ query = mobj.group('query')
+
+ webpage, urlh = self._download_webpage_handle(url, resource_id) # XXX: add UrlReferrer?
+ redirect_url = urlh.geturl()
+
+ # XXX: might have also extracted UrlReferrer and QueryString from the html
+ service_path = compat_urlparse.urljoin(redirect_url, self._html_search_regex(
+ r'<div[^>]+\bid=["\']ServicePath[^>]+>(.+?)</div>', webpage, resource_id,
+ default='/Mediasite/PlayerService/PlayerService.svc/json'))
+
+ player_options = self._download_json(
+ '%s/GetPlayerOptions' % service_path, resource_id,
+ headers={
+ 'Content-type': 'application/json; charset=utf-8',
+ 'X-Requested-With': 'XMLHttpRequest',
+ },
+ data=json.dumps({
+ 'getPlayerOptionsRequest': {
+ 'ResourceId': resource_id,
+ 'QueryString': query,
+ 'UrlReferrer': data.get('UrlReferrer', ''),
+ 'UseScreenReader': False,
+ }
+ }).encode('utf-8'))['d']
+
+ presentation = player_options['Presentation']
+ title = presentation['Title']
+
+ if presentation is None:
+ raise ExtractorError(
+ 'Mediasite says: %s' % player_options['PlayerPresentationStatusMessage'],
+ expected=True)
+
+ thumbnails = []
+ formats = []
+ for snum, Stream in enumerate(presentation['Streams']):
+ stream_type = Stream.get('StreamType')
+ if stream_type is None:
+ continue
+
+ video_urls = Stream.get('VideoUrls')
+ if not isinstance(video_urls, list):
+ video_urls = []
+
+ stream_id = self._STREAM_TYPES.get(
+ stream_type, 'type%u' % stream_type)
+
+ stream_formats = []
+ for unum, VideoUrl in enumerate(video_urls):
+ video_url = url_or_none(VideoUrl.get('Location'))
+ if not video_url:
+ continue
+ # XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS
+
+ media_type = VideoUrl.get('MediaType')
+ if media_type == 'SS':
+ stream_formats.extend(self._extract_ism_formats(
+ video_url, resource_id,
+ ism_id='%s-%u.%u' % (stream_id, snum, unum),
+ fatal=False))
+ elif media_type == 'Dash':
+ stream_formats.extend(self._extract_mpd_formats(
+ video_url, resource_id,
+ mpd_id='%s-%u.%u' % (stream_id, snum, unum),
+ fatal=False))
+ else:
+ stream_formats.append({
+ 'format_id': '%s-%u.%u' % (stream_id, snum, unum),
+ 'url': video_url,
+ 'ext': mimetype2ext(VideoUrl.get('MimeType')),
+ })
+
+ # TODO: if Stream['HasSlideContent']:
+ # synthesise an MJPEG video stream '%s-%u.slides' % (stream_type, snum)
+ # from Stream['Slides']
+ # this will require writing a custom downloader...
+
+ # disprefer 'secondary' streams
+ if stream_type != 0:
+ for fmt in stream_formats:
+ fmt['preference'] = -1
+
+ thumbnail_url = Stream.get('ThumbnailUrl')
+ if thumbnail_url:
+ thumbnails.append({
+ 'id': '%s-%u' % (stream_id, snum),
+ 'url': urljoin(redirect_url, thumbnail_url),
+ 'preference': -1 if stream_type != 0 else 0,
+ })
+ formats.extend(stream_formats)
+
+ self._sort_formats(formats)
+
+ # XXX: Presentation['Presenters']
+ # XXX: Presentation['Transcript']
+
+ return {
+ 'id': resource_id,
+ 'title': title,
+ 'description': presentation.get('Description'),
+ 'duration': float_or_none(presentation.get('Duration'), 1000),
+ 'timestamp': float_or_none(presentation.get('UnixTime'), 1000),
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ }
+
+
+class MediasiteCatalogIE(InfoExtractor):
+ _VALID_URL = r'''(?xi)
+ (?P<url>https?://[^/]+/Mediasite)
+ /Catalog/Full/
+ (?P<catalog_id>{0})
+ (?:
+ /(?P<current_folder_id>{0})
+ /(?P<root_dynamic_folder_id>{0})
+ )?
+ '''.format(_ID_RE)
+ _TESTS = [{
+ 'url': 'http://events7.mediasite.com/Mediasite/Catalog/Full/631f9e48530d454381549f955d08c75e21',
+ 'info_dict': {
+ 'id': '631f9e48530d454381549f955d08c75e21',
+ 'title': 'WCET Summit: Adaptive Learning in Higher Ed: Improving Outcomes Dynamically',
+ },
+ 'playlist_count': 6,
+ 'expected_warnings': ['is not a supported codec'],
+ }, {
+ # with CurrentFolderId and RootDynamicFolderId
+ 'url': 'https://medaudio.medicine.iu.edu/Mediasite/Catalog/Full/9518c4a6c5cf4993b21cbd53e828a92521/97a9db45f7ab47428c77cd2ed74bb98f14/9518c4a6c5cf4993b21cbd53e828a92521',
+ 'info_dict': {
+ 'id': '9518c4a6c5cf4993b21cbd53e828a92521',
+ 'title': 'IUSM Family and Friends Sessions',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'http://uipsyc.mediasite.com/mediasite/Catalog/Full/d5d79287c75243c58c50fef50174ec1b21',
+ 'only_matching': True,
+ }, {
+ # no AntiForgeryToken
+ 'url': 'https://live.libraries.psu.edu/Mediasite/Catalog/Full/8376d4b24dd1457ea3bfe4cf9163feda21',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://medaudio.medicine.iu.edu/Mediasite/Catalog/Full/9518c4a6c5cf4993b21cbd53e828a92521/97a9db45f7ab47428c77cd2ed74bb98f14/9518c4a6c5cf4993b21cbd53e828a92521',
+ 'only_matching': True,
+ }, {
+ # dashed id
+ 'url': 'http://events7.mediasite.com/Mediasite/Catalog/Full/631f9e48-530d-4543-8154-9f955d08c75e',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ mediasite_url = mobj.group('url')
+ catalog_id = mobj.group('catalog_id')
+ current_folder_id = mobj.group('current_folder_id') or catalog_id
+ root_dynamic_folder_id = mobj.group('root_dynamic_folder_id')
+
+ webpage = self._download_webpage(url, catalog_id)
+
+ # AntiForgeryToken is optional (e.g. [1])
+ # 1. https://live.libraries.psu.edu/Mediasite/Catalog/Full/8376d4b24dd1457ea3bfe4cf9163feda21
+ anti_forgery_token = self._search_regex(
+ r'AntiForgeryToken\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'anti forgery token', default=None, group='value')
+ if anti_forgery_token:
+ anti_forgery_header = self._search_regex(
+ r'AntiForgeryHeaderName\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'anti forgery header name',
+ default='X-SOFO-AntiForgeryHeader', group='value')
+
+ data = {
+ 'IsViewPage': True,
+ 'IsNewFolder': True,
+ 'AuthTicket': None,
+ 'CatalogId': catalog_id,
+ 'CurrentFolderId': current_folder_id,
+ 'RootDynamicFolderId': root_dynamic_folder_id,
+ 'ItemsPerPage': 1000,
+ 'PageIndex': 0,
+ 'PermissionMask': 'Execute',
+ 'CatalogSearchType': 'SearchInFolder',
+ 'SortBy': 'Date',
+ 'SortDirection': 'Descending',
+ 'StartDate': None,
+ 'EndDate': None,
+ 'StatusFilterList': None,
+ 'PreviewKey': None,
+ 'Tags': [],
+ }
+
+ headers = {
+ 'Content-Type': 'application/json; charset=UTF-8',
+ 'Referer': url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ }
+ if anti_forgery_token:
+ headers[anti_forgery_header] = anti_forgery_token
+
+ catalog = self._download_json(
+ '%s/Catalog/Data/GetPresentationsForFolder' % mediasite_url,
+ catalog_id, data=json.dumps(data).encode(), headers=headers)
+
+ entries = []
+ for video in catalog['PresentationDetailsList']:
+ if not isinstance(video, dict):
+ continue
+ video_id = str_or_none(video.get('Id'))
+ if not video_id:
+ continue
+ entries.append(self.url_result(
+ '%s/Play/%s' % (mediasite_url, video_id),
+ ie=MediasiteIE.ie_key(), video_id=video_id))
+
+ title = try_get(
+ catalog, lambda x: x['CurrentFolder']['Name'], compat_str)
+
+ return self.playlist_result(entries, catalog_id, title,)
+
+
+class MediasiteNamedCatalogIE(InfoExtractor):
+ _VALID_URL = r'(?xi)(?P<url>https?://[^/]+/Mediasite)/Catalog/catalogs/(?P<catalog_name>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://msite.misis.ru/Mediasite/Catalog/catalogs/2016-industrial-management-skriabin-o-o',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ mediasite_url = mobj.group('url')
+ catalog_name = mobj.group('catalog_name')
+
+ webpage = self._download_webpage(url, catalog_name)
+
+ catalog_id = self._search_regex(
+ r'CatalogId\s*:\s*["\'](%s)' % _ID_RE, webpage, 'catalog id')
+
+ return self.url_result(
+ '%s/Catalog/Full/%s' % (mediasite_url, catalog_id),
+ ie=MediasiteCatalogIE.ie_key(), video_id=catalog_id)
diff --git a/youtube_dl/extractor/medici.py b/youtube_dlc/extractor/medici.py
index cd910238e..cd910238e 100644
--- a/youtube_dl/extractor/medici.py
+++ b/youtube_dlc/extractor/medici.py
diff --git a/youtube_dl/extractor/megaphone.py b/youtube_dlc/extractor/megaphone.py
index 5bafa6cf4..5bafa6cf4 100644
--- a/youtube_dl/extractor/megaphone.py
+++ b/youtube_dlc/extractor/megaphone.py
diff --git a/youtube_dl/extractor/meipai.py b/youtube_dlc/extractor/meipai.py
index 2445b8b39..2445b8b39 100644
--- a/youtube_dl/extractor/meipai.py
+++ b/youtube_dlc/extractor/meipai.py
diff --git a/youtube_dl/extractor/melonvod.py b/youtube_dlc/extractor/melonvod.py
index bd8cf13ab..bd8cf13ab 100644
--- a/youtube_dl/extractor/melonvod.py
+++ b/youtube_dlc/extractor/melonvod.py
diff --git a/youtube_dl/extractor/meta.py b/youtube_dlc/extractor/meta.py
index cdb46e163..cdb46e163 100644
--- a/youtube_dl/extractor/meta.py
+++ b/youtube_dlc/extractor/meta.py
diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dlc/extractor/metacafe.py
index 9e92416d1..9e92416d1 100644
--- a/youtube_dl/extractor/metacafe.py
+++ b/youtube_dlc/extractor/metacafe.py
diff --git a/youtube_dl/extractor/metacritic.py b/youtube_dlc/extractor/metacritic.py
index 7d468d78b..7d468d78b 100644
--- a/youtube_dl/extractor/metacritic.py
+++ b/youtube_dlc/extractor/metacritic.py
diff --git a/youtube_dl/extractor/mgoon.py b/youtube_dlc/extractor/mgoon.py
index 7bb473900..7bb473900 100644
--- a/youtube_dl/extractor/mgoon.py
+++ b/youtube_dlc/extractor/mgoon.py
diff --git a/youtube_dl/extractor/mgtv.py b/youtube_dlc/extractor/mgtv.py
index 71fc3ec56..71fc3ec56 100644
--- a/youtube_dl/extractor/mgtv.py
+++ b/youtube_dlc/extractor/mgtv.py
diff --git a/youtube_dl/extractor/miaopai.py b/youtube_dlc/extractor/miaopai.py
index f9e35ac7f..f9e35ac7f 100644
--- a/youtube_dl/extractor/miaopai.py
+++ b/youtube_dlc/extractor/miaopai.py
diff --git a/youtube_dl/extractor/microsoftvirtualacademy.py b/youtube_dlc/extractor/microsoftvirtualacademy.py
index 8e0aee0e6..8e0aee0e6 100644
--- a/youtube_dl/extractor/microsoftvirtualacademy.py
+++ b/youtube_dlc/extractor/microsoftvirtualacademy.py
diff --git a/youtube_dl/extractor/ministrygrid.py b/youtube_dlc/extractor/ministrygrid.py
index 8ad9239c5..8ad9239c5 100644
--- a/youtube_dl/extractor/ministrygrid.py
+++ b/youtube_dlc/extractor/ministrygrid.py
diff --git a/youtube_dl/extractor/minoto.py b/youtube_dlc/extractor/minoto.py
index 636731195..636731195 100644
--- a/youtube_dl/extractor/minoto.py
+++ b/youtube_dlc/extractor/minoto.py
diff --git a/youtube_dl/extractor/miomio.py b/youtube_dlc/extractor/miomio.py
index 40f72d66f..40f72d66f 100644
--- a/youtube_dl/extractor/miomio.py
+++ b/youtube_dlc/extractor/miomio.py
diff --git a/youtube_dlc/extractor/mit.py b/youtube_dlc/extractor/mit.py
new file mode 100644
index 000000000..e1506a745
--- /dev/null
+++ b/youtube_dlc/extractor/mit.py
@@ -0,0 +1,132 @@
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ get_element_by_id,
+)
+
+
+class TechTVMITIE(InfoExtractor):
+ IE_NAME = 'techtv.mit.edu'
+ _VALID_URL = r'https?://techtv\.mit\.edu/(?:videos|embeds)/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
+ 'md5': '00a3a27ee20d44bcaa0933ccec4a2cf7',
+ 'info_dict': {
+ 'id': '25418',
+ 'ext': 'mp4',
+ 'title': 'MIT DNA and Protein Sets',
+ 'description': 'md5:46f5c69ce434f0a97e7c628cc142802d',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ raw_page = self._download_webpage(
+ 'http://techtv.mit.edu/videos/%s' % video_id, video_id)
+ clean_page = re.compile(r'<!--.*?-->', re.S).sub('', raw_page)
+
+ base_url = self._proto_relative_url(self._search_regex(
+ r'ipadUrl: \'(.+?cloudfront.net/)', raw_page, 'base url'), 'http:')
+ formats_json = self._search_regex(
+ r'bitrates: (\[.+?\])', raw_page, 'video formats')
+ formats_mit = json.loads(formats_json)
+ formats = [
+ {
+ 'format_id': f['label'],
+ 'url': base_url + f['url'].partition(':')[2],
+ 'ext': f['url'].partition(':')[0],
+ 'format': f['label'],
+ 'width': f['width'],
+ 'vbr': f['bitrate'],
+ }
+ for f in formats_mit
+ ]
+
+ title = get_element_by_id('edit-title', clean_page)
+ description = clean_html(get_element_by_id('edit-description', clean_page))
+ thumbnail = self._search_regex(
+ r'playlist:.*?url: \'(.+?)\'',
+ raw_page, 'thumbnail', flags=re.DOTALL)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ }
+
+
+class OCWMITIE(InfoExtractor):
+ IE_NAME = 'ocw.mit.edu'
+ _VALID_URL = r'^https?://ocw\.mit\.edu/courses/(?P<topic>[a-z0-9\-]+)'
+ _BASE_URL = 'http://ocw.mit.edu/'
+
+ _TESTS = [
+ {
+ 'url': 'http://ocw.mit.edu/courses/electrical-engineering-and-computer-science/6-041-probabilistic-systems-analysis-and-applied-probability-fall-2010/video-lectures/lecture-7-multiple-variables-expectations-independence/',
+ 'info_dict': {
+ 'id': 'EObHWIEKGjA',
+ 'ext': 'webm',
+ 'title': 'Lecture 7: Multiple Discrete Random Variables: Expectations, Conditioning, Independence',
+ 'description': 'In this lecture, the professor discussed multiple random variables, expectations, and binomial distribution.',
+ 'upload_date': '20121109',
+ 'uploader_id': 'MIT',
+ 'uploader': 'MIT OpenCourseWare',
+ }
+ },
+ {
+ 'url': 'http://ocw.mit.edu/courses/mathematics/18-01sc-single-variable-calculus-fall-2010/1.-differentiation/part-a-definition-and-basic-rules/session-1-introduction-to-derivatives/',
+ 'info_dict': {
+ 'id': '7K1sB05pE0A',
+ 'ext': 'mp4',
+ 'title': 'Session 1: Introduction to Derivatives',
+ 'upload_date': '20090818',
+ 'uploader_id': 'MIT',
+ 'uploader': 'MIT OpenCourseWare',
+ 'description': 'This section contains lecture video excerpts, lecture notes, an interactive mathlet with supporting documents, and problem solving videos.',
+ }
+ }
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ topic = mobj.group('topic')
+
+ webpage = self._download_webpage(url, topic)
+ title = self._html_search_meta('WT.cg_s', webpage)
+ description = self._html_search_meta('Description', webpage)
+
+ # search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, start, stop, captions_file)
+ embed_chapter_media = re.search(r'ocw_embed_chapter_media\((.+?)\)', webpage)
+ if embed_chapter_media:
+ metadata = re.sub(r'[\'"]', '', embed_chapter_media.group(1))
+ metadata = re.split(r', ?', metadata)
+ yt = metadata[1]
+ else:
+ # search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, captions_file)
+ embed_media = re.search(r'ocw_embed_media\((.+?)\)', webpage)
+ if embed_media:
+ metadata = re.sub(r'[\'"]', '', embed_media.group(1))
+ metadata = re.split(r', ?', metadata)
+ yt = metadata[1]
+ else:
+ raise ExtractorError('Unable to find embedded YouTube video.')
+ video_id = YoutubeIE.extract_id(yt)
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'url': yt,
+ 'ie_key': 'Youtube',
+ }
diff --git a/youtube_dlc/extractor/mitele.py b/youtube_dlc/extractor/mitele.py
new file mode 100644
index 000000000..ad9da9612
--- /dev/null
+++ b/youtube_dlc/extractor/mitele.py
@@ -0,0 +1,93 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ smuggle_url,
+)
+
+
+class MiTeleIE(InfoExtractor):
+ IE_DESC = 'mitele.es'
+ _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
+
+ _TESTS = [{
+ 'url': 'http://www.mitele.es/programas-tv/diario-de/57b0dfb9c715da65618b4afa/player',
+ 'info_dict': {
+ 'id': 'FhYW1iNTE6J6H7NkQRIEzfne6t2quqPg',
+ 'ext': 'mp4',
+ 'title': 'Diario de La redacción Programa 144',
+ 'description': 'md5:07c35a7b11abb05876a6a79185b58d27',
+ 'series': 'Diario de',
+ 'season': 'Season 14',
+ 'season_number': 14,
+ 'episode': 'Tor, la web invisible',
+ 'episode_number': 3,
+ 'thumbnail': r're:(?i)^https?://.*\.jpg$',
+ 'duration': 2913,
+ 'age_limit': 16,
+ 'timestamp': 1471209401,
+ 'upload_date': '20160814',
+ },
+ 'add_ie': ['Ooyala'],
+ }, {
+ # no explicit title
+ 'url': 'http://www.mitele.es/programas-tv/cuarto-milenio/57b0de3dc915da14058b4876/player',
+ 'info_dict': {
+ 'id': 'oyNG1iNTE6TAPP-JmCjbwfwJqqMMX3Vq',
+ 'ext': 'mp4',
+ 'title': 'Cuarto Milenio Temporada 6 Programa 226',
+ 'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f',
+ 'series': 'Cuarto Milenio',
+ 'season': 'Season 6',
+ 'season_number': 6,
+ 'episode': 'Episode 24',
+ 'episode_number': 24,
+ 'thumbnail': r're:(?i)^https?://.*\.jpg$',
+ 'duration': 7313,
+ 'age_limit': 12,
+ 'timestamp': 1471209021,
+ 'upload_date': '20160814',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Ooyala'],
+ }, {
+ 'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144-40_1006364575251/player/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ pre_player = self._parse_json(self._search_regex(
+ r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=\s*({.+})',
+ webpage, 'Pre Player'), display_id)['prePlayer']
+ title = pre_player['title']
+ video = pre_player['video']
+ video_id = video['dataMediaId']
+ content = pre_player.get('content') or {}
+ info = content.get('info') or {}
+
+ return {
+ '_type': 'url_transparent',
+ # for some reason only HLS is supported
+ 'url': smuggle_url('ooyala:' + video_id, {'supportedformats': 'm3u8,dash'}),
+ 'id': video_id,
+ 'title': title,
+ 'description': info.get('synopsis'),
+ 'series': content.get('title'),
+ 'season_number': int_or_none(info.get('season_number')),
+ 'episode': content.get('subtitle'),
+ 'episode_number': int_or_none(info.get('episode_number')),
+ 'duration': int_or_none(info.get('duration')),
+ 'thumbnail': video.get('dataPoster'),
+ 'age_limit': int_or_none(info.get('rating')),
+ 'timestamp': parse_iso8601(pre_player.get('publishedTime')),
+ }
diff --git a/youtube_dlc/extractor/mixcloud.py b/youtube_dlc/extractor/mixcloud.py
new file mode 100644
index 000000000..9759560f1
--- /dev/null
+++ b/youtube_dlc/extractor/mixcloud.py
@@ -0,0 +1,351 @@
+from __future__ import unicode_literals
+
+import itertools
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_chr,
+ compat_ord,
+ compat_str,
+ compat_urllib_parse_unquote,
+ compat_zip
+)
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ strip_or_none,
+ try_get,
+)
+
+
+class MixcloudBaseIE(InfoExtractor):
+ def _call_api(self, object_type, object_fields, display_id, username, slug=None):
+ lookup_key = object_type + 'Lookup'
+ return self._download_json(
+ 'https://www.mixcloud.com/graphql', display_id, query={
+ 'query': '''{
+ %s(lookup: {username: "%s"%s}) {
+ %s
+ }
+}''' % (lookup_key, username, ', slug: "%s"' % slug if slug else '', object_fields)
+ })['data'][lookup_key]
+
+
+class MixcloudIE(MixcloudBaseIE):
+ _VALID_URL = r'https?://(?:(?:www|beta|m)\.)?mixcloud\.com/([^/]+)/(?!stream|uploads|favorites|listens|playlists)([^/]+)'
+ IE_NAME = 'mixcloud'
+
+ _TESTS = [{
+ 'url': 'http://www.mixcloud.com/dholbach/cryptkeeper/',
+ 'info_dict': {
+ 'id': 'dholbach_cryptkeeper',
+ 'ext': 'm4a',
+ 'title': 'Cryptkeeper',
+ 'description': 'After quite a long silence from myself, finally another Drum\'n\'Bass mix with my favourite current dance floor bangers.',
+ 'uploader': 'Daniel Holbach',
+ 'uploader_id': 'dholbach',
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'view_count': int,
+ 'timestamp': 1321359578,
+ 'upload_date': '20111115',
+ },
+ }, {
+ 'url': 'http://www.mixcloud.com/gillespeterson/caribou-7-inch-vinyl-mix-chat/',
+ 'info_dict': {
+ 'id': 'gillespeterson_caribou-7-inch-vinyl-mix-chat',
+ 'ext': 'mp3',
+ 'title': 'Caribou 7 inch Vinyl Mix & Chat',
+ 'description': 'md5:2b8aec6adce69f9d41724647c65875e8',
+ 'uploader': 'Gilles Peterson Worldwide',
+ 'uploader_id': 'gillespeterson',
+ 'thumbnail': 're:https?://.*',
+ 'view_count': int,
+ 'timestamp': 1422987057,
+ 'upload_date': '20150203',
+ },
+ }, {
+ 'url': 'https://beta.mixcloud.com/RedLightRadio/nosedrip-15-red-light-radio-01-18-2016/',
+ 'only_matching': True,
+ }]
+ _DECRYPTION_KEY = 'IFYOUWANTTHEARTISTSTOGETPAIDDONOTDOWNLOADFROMMIXCLOUD'
+
+ @staticmethod
+ def _decrypt_xor_cipher(key, ciphertext):
+ """Encrypt/Decrypt XOR cipher. Both ways are possible because it's XOR."""
+ return ''.join([
+ compat_chr(compat_ord(ch) ^ compat_ord(k))
+ for ch, k in compat_zip(ciphertext, itertools.cycle(key))])
+
+ def _real_extract(self, url):
+ username, slug = re.match(self._VALID_URL, url).groups()
+ username, slug = compat_urllib_parse_unquote(username), compat_urllib_parse_unquote(slug)
+ track_id = '%s_%s' % (username, slug)
+
+ cloudcast = self._call_api('cloudcast', '''audioLength
+ comments(first: 100) {
+ edges {
+ node {
+ comment
+ created
+ user {
+ displayName
+ username
+ }
+ }
+ }
+ totalCount
+ }
+ description
+ favorites {
+ totalCount
+ }
+ featuringArtistList
+ isExclusive
+ name
+ owner {
+ displayName
+ url
+ username
+ }
+ picture(width: 1024, height: 1024) {
+ url
+ }
+ plays
+ publishDate
+ reposts {
+ totalCount
+ }
+ streamInfo {
+ dashUrl
+ hlsUrl
+ url
+ }
+ tags {
+ tag {
+ name
+ }
+ }''', track_id, username, slug)
+
+ title = cloudcast['name']
+
+ stream_info = cloudcast['streamInfo']
+ formats = []
+
+ for url_key in ('url', 'hlsUrl', 'dashUrl'):
+ format_url = stream_info.get(url_key)
+ if not format_url:
+ continue
+ decrypted = self._decrypt_xor_cipher(
+ self._DECRYPTION_KEY, compat_b64decode(format_url))
+ if url_key == 'hlsUrl':
+ formats.extend(self._extract_m3u8_formats(
+ decrypted, track_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif url_key == 'dashUrl':
+ formats.extend(self._extract_mpd_formats(
+ decrypted, track_id, mpd_id='dash', fatal=False))
+ else:
+ formats.append({
+ 'format_id': 'http',
+ 'url': decrypted,
+ 'downloader_options': {
+ # Mixcloud starts throttling at >~5M
+ 'http_chunk_size': 5242880,
+ },
+ })
+
+ if not formats and cloudcast.get('isExclusive'):
+ self.raise_login_required()
+
+ self._sort_formats(formats)
+
+ comments = []
+ for edge in (try_get(cloudcast, lambda x: x['comments']['edges']) or []):
+ node = edge.get('node') or {}
+ text = strip_or_none(node.get('comment'))
+ if not text:
+ continue
+ user = node.get('user') or {}
+ comments.append({
+ 'author': user.get('displayName'),
+ 'author_id': user.get('username'),
+ 'text': text,
+ 'timestamp': parse_iso8601(node.get('created')),
+ })
+
+ tags = []
+ for t in cloudcast.get('tags'):
+ tag = try_get(t, lambda x: x['tag']['name'], compat_str)
+ if not tag:
+ tags.append(tag)
+
+ get_count = lambda x: int_or_none(try_get(cloudcast, lambda y: y[x]['totalCount']))
+
+ owner = cloudcast.get('owner') or {}
+
+ return {
+ 'id': track_id,
+ 'title': title,
+ 'formats': formats,
+ 'description': cloudcast.get('description'),
+ 'thumbnail': try_get(cloudcast, lambda x: x['picture']['url'], compat_str),
+ 'uploader': owner.get('displayName'),
+ 'timestamp': parse_iso8601(cloudcast.get('publishDate')),
+ 'uploader_id': owner.get('username'),
+ 'uploader_url': owner.get('url'),
+ 'duration': int_or_none(cloudcast.get('audioLength')),
+ 'view_count': int_or_none(cloudcast.get('plays')),
+ 'like_count': get_count('favorites'),
+ 'repost_count': get_count('reposts'),
+ 'comment_count': get_count('comments'),
+ 'comments': comments,
+ 'tags': tags,
+ 'artist': ', '.join(cloudcast.get('featuringArtistList') or []) or None,
+ }
+
+
+class MixcloudPlaylistBaseIE(MixcloudBaseIE):
+ def _get_cloudcast(self, node):
+ return node
+
+ def _get_playlist_title(self, title, slug):
+ return title
+
+ def _real_extract(self, url):
+ username, slug = re.match(self._VALID_URL, url).groups()
+ username = compat_urllib_parse_unquote(username)
+ if not slug:
+ slug = 'uploads'
+ else:
+ slug = compat_urllib_parse_unquote(slug)
+ playlist_id = '%s_%s' % (username, slug)
+
+ is_playlist_type = self._ROOT_TYPE == 'playlist'
+ playlist_type = 'items' if is_playlist_type else slug
+ list_filter = ''
+
+ has_next_page = True
+ entries = []
+ while has_next_page:
+ playlist = self._call_api(
+ self._ROOT_TYPE, '''%s
+ %s
+ %s(first: 100%s) {
+ edges {
+ node {
+ %s
+ }
+ }
+ pageInfo {
+ endCursor
+ hasNextPage
+ }
+ }''' % (self._TITLE_KEY, self._DESCRIPTION_KEY, playlist_type, list_filter, self._NODE_TEMPLATE),
+ playlist_id, username, slug if is_playlist_type else None)
+
+ items = playlist.get(playlist_type) or {}
+ for edge in items.get('edges', []):
+ cloudcast = self._get_cloudcast(edge.get('node') or {})
+ cloudcast_url = cloudcast.get('url')
+ if not cloudcast_url:
+ continue
+ entries.append(self.url_result(
+ cloudcast_url, MixcloudIE.ie_key(), cloudcast.get('slug')))
+
+ page_info = items['pageInfo']
+ has_next_page = page_info['hasNextPage']
+ list_filter = ', after: "%s"' % page_info['endCursor']
+
+ return self.playlist_result(
+ entries, playlist_id,
+ self._get_playlist_title(playlist[self._TITLE_KEY], slug),
+ playlist.get(self._DESCRIPTION_KEY))
+
+
+class MixcloudUserIE(MixcloudPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<id>[^/]+)/(?P<type>uploads|favorites|listens|stream)?/?$'
+ IE_NAME = 'mixcloud:user'
+
+ _TESTS = [{
+ 'url': 'http://www.mixcloud.com/dholbach/',
+ 'info_dict': {
+ 'id': 'dholbach_uploads',
+ 'title': 'Daniel Holbach (uploads)',
+ 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
+ },
+ 'playlist_mincount': 36,
+ }, {
+ 'url': 'http://www.mixcloud.com/dholbach/uploads/',
+ 'info_dict': {
+ 'id': 'dholbach_uploads',
+ 'title': 'Daniel Holbach (uploads)',
+ 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
+ },
+ 'playlist_mincount': 36,
+ }, {
+ 'url': 'http://www.mixcloud.com/dholbach/favorites/',
+ 'info_dict': {
+ 'id': 'dholbach_favorites',
+ 'title': 'Daniel Holbach (favorites)',
+ 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
+ },
+ # 'params': {
+ # 'playlist_items': '1-100',
+ # },
+ 'playlist_mincount': 396,
+ }, {
+ 'url': 'http://www.mixcloud.com/dholbach/listens/',
+ 'info_dict': {
+ 'id': 'dholbach_listens',
+ 'title': 'Daniel Holbach (listens)',
+ 'description': 'md5:b60d776f0bab534c5dabe0a34e47a789',
+ },
+ # 'params': {
+ # 'playlist_items': '1-100',
+ # },
+ 'playlist_mincount': 1623,
+ 'skip': 'Large list',
+ }, {
+ 'url': 'https://www.mixcloud.com/FirstEar/stream/',
+ 'info_dict': {
+ 'id': 'FirstEar_stream',
+ 'title': 'First Ear (stream)',
+ 'description': 'Curators of good music\r\n\r\nfirstearmusic.com',
+ },
+ 'playlist_mincount': 271,
+ }]
+
+ _TITLE_KEY = 'displayName'
+ _DESCRIPTION_KEY = 'biog'
+ _ROOT_TYPE = 'user'
+ _NODE_TEMPLATE = '''slug
+ url'''
+
+ def _get_playlist_title(self, title, slug):
+ return '%s (%s)' % (title, slug)
+
+
+class MixcloudPlaylistIE(MixcloudPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?mixcloud\.com/(?P<user>[^/]+)/playlists/(?P<playlist>[^/]+)/?$'
+ IE_NAME = 'mixcloud:playlist'
+
+ _TESTS = [{
+ 'url': 'https://www.mixcloud.com/maxvibes/playlists/jazzcat-on-ness-radio/',
+ 'info_dict': {
+ 'id': 'maxvibes_jazzcat-on-ness-radio',
+ 'title': 'Ness Radio sessions',
+ },
+ 'playlist_mincount': 59,
+ }]
+ _TITLE_KEY = 'name'
+ _DESCRIPTION_KEY = 'description'
+ _ROOT_TYPE = 'playlist'
+ _NODE_TEMPLATE = '''cloudcast {
+ slug
+ url
+ }'''
+
+ def _get_cloudcast(self, node):
+ return node.get('cloudcast') or {}
diff --git a/youtube_dl/extractor/mlb.py b/youtube_dlc/extractor/mlb.py
index b907f6b49..b907f6b49 100644
--- a/youtube_dl/extractor/mlb.py
+++ b/youtube_dlc/extractor/mlb.py
diff --git a/youtube_dl/extractor/mnet.py b/youtube_dlc/extractor/mnet.py
index 0e26ca1b3..0e26ca1b3 100644
--- a/youtube_dl/extractor/mnet.py
+++ b/youtube_dlc/extractor/mnet.py
diff --git a/youtube_dl/extractor/moevideo.py b/youtube_dlc/extractor/moevideo.py
index eb9b4ce7c..eb9b4ce7c 100644
--- a/youtube_dl/extractor/moevideo.py
+++ b/youtube_dlc/extractor/moevideo.py
diff --git a/youtube_dlc/extractor/mofosex.py b/youtube_dlc/extractor/mofosex.py
new file mode 100644
index 000000000..5234cac02
--- /dev/null
+++ b/youtube_dlc/extractor/mofosex.py
@@ -0,0 +1,79 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ str_to_int,
+ unified_strdate,
+)
+from .keezmovies import KeezMoviesIE
+
+
+class MofosexIE(KeezMoviesIE):
+ _VALID_URL = r'https?://(?:www\.)?mofosex\.com/videos/(?P<id>\d+)/(?P<display_id>[^/?#&.]+)\.html'
+ _TESTS = [{
+ 'url': 'http://www.mofosex.com/videos/318131/amateur-teen-playing-and-masturbating-318131.html',
+ 'md5': '558fcdafbb63a87c019218d6e49daf8a',
+ 'info_dict': {
+ 'id': '318131',
+ 'display_id': 'amateur-teen-playing-and-masturbating-318131',
+ 'ext': 'mp4',
+ 'title': 'amateur teen playing and masturbating',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20121114',
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'age_limit': 18,
+ }
+ }, {
+ # This video is no longer available
+ 'url': 'http://www.mofosex.com/videos/5018/japanese-teen-music-video.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ webpage, info = self._extract_info(url)
+
+ view_count = str_to_int(self._search_regex(
+ r'VIEWS:</span>\s*([\d,.]+)', webpage, 'view count', fatal=False))
+ like_count = int_or_none(self._search_regex(
+ r'id=["\']amountLikes["\'][^>]*>(\d+)', webpage,
+ 'like count', fatal=False))
+ dislike_count = int_or_none(self._search_regex(
+ r'id=["\']amountDislikes["\'][^>]*>(\d+)', webpage,
+ 'like count', fatal=False))
+ upload_date = unified_strdate(self._html_search_regex(
+ r'Added:</span>([^<]+)', webpage, 'upload date', fatal=False))
+
+ info.update({
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
+ 'upload_date': upload_date,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ })
+
+ return info
+
+
+class MofosexEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?mofosex\.com/embed/?\?.*?\bvideoid=(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.mofosex.com/embed/?videoid=318131&referrer=KM',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?mofosex\.com/embed/?\?.*?\bvideoid=\d+)',
+ webpage)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ return self.url_result(
+ 'http://www.mofosex.com/videos/{0}/{0}.html'.format(video_id),
+ ie=MofosexIE.ie_key(), video_id=video_id)
diff --git a/youtube_dl/extractor/mojvideo.py b/youtube_dlc/extractor/mojvideo.py
index 165e658c9..165e658c9 100644
--- a/youtube_dl/extractor/mojvideo.py
+++ b/youtube_dlc/extractor/mojvideo.py
diff --git a/youtube_dl/extractor/morningstar.py b/youtube_dlc/extractor/morningstar.py
index 0093bcd6c..0093bcd6c 100644
--- a/youtube_dl/extractor/morningstar.py
+++ b/youtube_dlc/extractor/morningstar.py
diff --git a/youtube_dlc/extractor/motherless.py b/youtube_dlc/extractor/motherless.py
new file mode 100644
index 000000000..b1615b4d8
--- /dev/null
+++ b/youtube_dlc/extractor/motherless.py
@@ -0,0 +1,207 @@
+from __future__ import unicode_literals
+
+import datetime
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ ExtractorError,
+ InAdvancePagedList,
+ orderedSet,
+ str_to_int,
+ unified_strdate,
+)
+
+
+class MotherlessIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?motherless\.com/(?:g/[a-z0-9_]+/)?(?P<id>[A-Z0-9]+)'
+ _TESTS = [{
+ 'url': 'http://motherless.com/AC3FFE1',
+ 'md5': '310f62e325a9fafe64f68c0bccb6e75f',
+ 'info_dict': {
+ 'id': 'AC3FFE1',
+ 'ext': 'mp4',
+ 'title': 'Fucked in the ass while playing PS3',
+ 'categories': ['Gaming', 'anal', 'reluctant', 'rough', 'Wife'],
+ 'upload_date': '20100913',
+ 'uploader_id': 'famouslyfuckedup',
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'age_limit': 18,
+ }
+ }, {
+ 'url': 'http://motherless.com/532291B',
+ 'md5': 'bc59a6b47d1f958e61fbd38a4d31b131',
+ 'info_dict': {
+ 'id': '532291B',
+ 'ext': 'mp4',
+ 'title': 'Amazing girl playing the omegle game, PERFECT!',
+ 'categories': ['Amateur', 'webcam', 'omegle', 'pink', 'young', 'masturbate', 'teen',
+ 'game', 'hairy'],
+ 'upload_date': '20140622',
+ 'uploader_id': 'Sulivana7x',
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'age_limit': 18,
+ },
+ 'skip': '404',
+ }, {
+ 'url': 'http://motherless.com/g/cosplay/633979F',
+ 'md5': '0b2a43f447a49c3e649c93ad1fafa4a0',
+ 'info_dict': {
+ 'id': '633979F',
+ 'ext': 'mp4',
+ 'title': 'Turtlette',
+ 'categories': ['superheroine heroine superher'],
+ 'upload_date': '20140827',
+ 'uploader_id': 'shade0230',
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'age_limit': 18,
+ }
+ }, {
+ # no keywords
+ 'url': 'http://motherless.com/8B4BBC1',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ if any(p in webpage for p in (
+ '<title>404 - MOTHERLESS.COM<',
+ ">The page you're looking for cannot be found.<")):
+ raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+
+ if '>The content you are trying to view is for friends only.' in webpage:
+ raise ExtractorError('Video %s is for friends only' % video_id, expected=True)
+
+ title = self._html_search_regex(
+ (r'(?s)<div[^>]+\bclass=["\']media-meta-title[^>]+>(.+?)</div>',
+ r'id="view-upload-title">\s+([^<]+)<'), webpage, 'title')
+ video_url = (self._html_search_regex(
+ (r'setup\(\{\s*["\']file["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
+ r'fileurl\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1'),
+ webpage, 'video URL', default=None, group='url')
+ or 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)
+ age_limit = self._rta_search(webpage)
+ view_count = str_to_int(self._html_search_regex(
+ (r'>(\d+)\s+Views<', r'<strong>Views</strong>\s+([^<]+)<'),
+ webpage, 'view count', fatal=False))
+ like_count = str_to_int(self._html_search_regex(
+ (r'>(\d+)\s+Favorites<', r'<strong>Favorited</strong>\s+([^<]+)<'),
+ webpage, 'like count', fatal=False))
+
+ upload_date = self._html_search_regex(
+ (r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<',
+ r'<strong>Uploaded</strong>\s+([^<]+)<'), webpage, 'upload date')
+ if 'Ago' in upload_date:
+ days = int(re.search(r'([0-9]+)', upload_date).group(1))
+ upload_date = (datetime.datetime.now() - datetime.timedelta(days=days)).strftime('%Y%m%d')
+ else:
+ upload_date = unified_strdate(upload_date)
+
+ comment_count = webpage.count('class="media-comment-contents"')
+ uploader_id = self._html_search_regex(
+ r'"thumb-member-username">\s+<a href="/m/([^"]+)"',
+ webpage, 'uploader_id')
+
+ categories = self._html_search_meta('keywords', webpage, default=None)
+ if categories:
+ categories = [cat.strip() for cat in categories.split(',')]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'upload_date': upload_date,
+ 'uploader_id': uploader_id,
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ 'categories': categories,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'comment_count': comment_count,
+ 'age_limit': age_limit,
+ 'url': video_url,
+ }
+
+
+class MotherlessGroupIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?motherless\.com/gv?/(?P<id>[a-z0-9_]+)'
+ _TESTS = [{
+ 'url': 'http://motherless.com/g/movie_scenes',
+ 'info_dict': {
+ 'id': 'movie_scenes',
+ 'title': 'Movie Scenes',
+ 'description': 'Hot and sexy scenes from "regular" movies... '
+ 'Beautiful actresses fully nude... A looot of '
+ 'skin! :)Enjoy!',
+ },
+ 'playlist_mincount': 662,
+ }, {
+ 'url': 'http://motherless.com/gv/sex_must_be_funny',
+ 'info_dict': {
+ 'id': 'sex_must_be_funny',
+ 'title': 'Sex must be funny',
+ 'description': 'Sex can be funny. Wide smiles,laugh, games, fun of '
+ 'any kind!'
+ },
+ 'playlist_mincount': 9,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (False if MotherlessIE.suitable(url)
+ else super(MotherlessGroupIE, cls).suitable(url))
+
+ def _extract_entries(self, webpage, base):
+ entries = []
+ for mobj in re.finditer(
+ r'href="(?P<href>/[^"]+)"[^>]*>(?:\s*<img[^>]+alt="[^-]+-\s(?P<title>[^"]+)")?',
+ webpage):
+ video_url = compat_urlparse.urljoin(base, mobj.group('href'))
+ if not MotherlessIE.suitable(video_url):
+ continue
+ video_id = MotherlessIE._match_id(video_url)
+ title = mobj.group('title')
+ entries.append(self.url_result(
+ video_url, ie=MotherlessIE.ie_key(), video_id=video_id,
+ video_title=title))
+ # Alternative fallback
+ if not entries:
+ entries = [
+ self.url_result(
+ compat_urlparse.urljoin(base, '/' + entry_id),
+ ie=MotherlessIE.ie_key(), video_id=entry_id)
+ for entry_id in orderedSet(re.findall(
+ r'data-codename=["\']([A-Z0-9]+)', webpage))]
+ return entries
+
+ def _real_extract(self, url):
+ group_id = self._match_id(url)
+ page_url = compat_urlparse.urljoin(url, '/gv/%s' % group_id)
+ webpage = self._download_webpage(page_url, group_id)
+ title = self._search_regex(
+ r'<title>([\w\s]+\w)\s+-', webpage, 'title', fatal=False)
+ description = self._html_search_meta(
+ 'description', webpage, fatal=False)
+ page_count = self._int(self._search_regex(
+ r'(\d+)</(?:a|span)><(?:a|span)[^>]+>\s*NEXT',
+ webpage, 'page_count'), 'page_count')
+ PAGE_SIZE = 80
+
+ def _get_page(idx):
+ webpage = self._download_webpage(
+ page_url, group_id, query={'page': idx + 1},
+ note='Downloading page %d/%d' % (idx + 1, page_count)
+ )
+ for entry in self._extract_entries(webpage, url):
+ yield entry
+
+ playlist = InAdvancePagedList(_get_page, page_count, PAGE_SIZE)
+
+ return {
+ '_type': 'playlist',
+ 'id': group_id,
+ 'title': title,
+ 'description': description,
+ 'entries': playlist
+ }
diff --git a/youtube_dl/extractor/motorsport.py b/youtube_dlc/extractor/motorsport.py
index c9d1ab64d..c9d1ab64d 100644
--- a/youtube_dl/extractor/motorsport.py
+++ b/youtube_dlc/extractor/motorsport.py
diff --git a/youtube_dl/extractor/movieclips.py b/youtube_dlc/extractor/movieclips.py
index 5453da1ac..5453da1ac 100644
--- a/youtube_dl/extractor/movieclips.py
+++ b/youtube_dlc/extractor/movieclips.py
diff --git a/youtube_dl/extractor/moviezine.py b/youtube_dlc/extractor/moviezine.py
index 85cc6e22f..85cc6e22f 100644
--- a/youtube_dl/extractor/moviezine.py
+++ b/youtube_dlc/extractor/moviezine.py
diff --git a/youtube_dl/extractor/movingimage.py b/youtube_dlc/extractor/movingimage.py
index 4f62d628a..4f62d628a 100644
--- a/youtube_dl/extractor/movingimage.py
+++ b/youtube_dlc/extractor/movingimage.py
diff --git a/youtube_dlc/extractor/msn.py b/youtube_dlc/extractor/msn.py
new file mode 100644
index 000000000..e59b0b7b0
--- /dev/null
+++ b/youtube_dlc/extractor/msn.py
@@ -0,0 +1,171 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ unescapeHTML,
+)
+
+
+class MSNIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www|preview)\.)?msn\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/[a-z]{2}-(?P<id>[\da-zA-Z]+)'
+ _TESTS = [{
+ 'url': 'https://www.msn.com/en-in/money/video/7-ways-to-get-rid-of-chest-congestion/vi-BBPxU6d',
+ 'md5': '087548191d273c5c55d05028f8d2cbcd',
+ 'info_dict': {
+ 'id': 'BBPxU6d',
+ 'display_id': '7-ways-to-get-rid-of-chest-congestion',
+ 'ext': 'mp4',
+ 'title': 'Seven ways to get rid of chest congestion',
+ 'description': '7 Ways to Get Rid of Chest Congestion',
+ 'duration': 88,
+ 'uploader': 'Health',
+ 'uploader_id': 'BBPrMqa',
+ },
+ }, {
+ # Article, multiple Dailymotion Embeds
+ 'url': 'https://www.msn.com/en-in/money/sports/hottest-football-wags-greatest-footballers-turned-managers-and-more/ar-BBpc7Nl',
+ 'info_dict': {
+ 'id': 'BBpc7Nl',
+ },
+ 'playlist_mincount': 4,
+ }, {
+ 'url': 'http://www.msn.com/en-ae/news/offbeat/meet-the-nine-year-old-self-made-millionaire/ar-BBt6ZKf',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.msn.com/en-ae/video/watch/obama-a-lot-of-people-will-be-disappointed/vi-AAhxUMH',
+ 'only_matching': True,
+ }, {
+ # geo restricted
+ 'url': 'http://www.msn.com/en-ae/foodanddrink/joinourtable/the-first-fart-makes-you-laugh-the-last-fart-makes-you-cry/vp-AAhzIBU',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.msn.com/en-ae/entertainment/bollywood/watch-how-salman-khan-reacted-when-asked-if-he-would-apologize-for-his-‘raped-woman’-comment/vi-AAhvzW6',
+ 'only_matching': True,
+ }, {
+ # Vidible(AOL) Embed
+ 'url': 'https://www.msn.com/en-us/money/other/jupiter-is-about-to-come-so-close-you-can-see-its-moons-with-binoculars/vi-AACqsHR',
+ 'only_matching': True,
+ }, {
+ # Dailymotion Embed
+ 'url': 'https://www.msn.com/es-ve/entretenimiento/watch/winston-salem-paire-refait-des-siennes-en-perdant-sa-raquette-au-service/vp-AAG704L',
+ 'only_matching': True,
+ }, {
+ # YouTube Embed
+ 'url': 'https://www.msn.com/en-in/money/news/meet-vikram-%E2%80%94-chandrayaan-2s-lander/vi-AAGUr0v',
+ 'only_matching': True,
+ }, {
+ # NBCSports Embed
+ 'url': 'https://www.msn.com/en-us/money/football_nfl/week-13-preview-redskins-vs-panthers/vi-BBXsCDb',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id, page_id = re.match(self._VALID_URL, url).groups()
+
+ webpage = self._download_webpage(url, display_id)
+
+ entries = []
+ for _, metadata in re.findall(r'data-metadata\s*=\s*(["\'])(?P<data>.+?)\1', webpage):
+ video = self._parse_json(unescapeHTML(metadata), display_id)
+
+ provider_id = video.get('providerId')
+ player_name = video.get('playerName')
+ if player_name and provider_id:
+ entry = None
+ if player_name == 'AOL':
+ if provider_id.startswith('http'):
+ provider_id = self._search_regex(
+ r'https?://delivery\.vidible\.tv/video/redirect/([0-9a-f]{24})',
+ provider_id, 'vidible id')
+ entry = self.url_result(
+ 'aol-video:' + provider_id, 'Aol', provider_id)
+ elif player_name == 'Dailymotion':
+ entry = self.url_result(
+ 'https://www.dailymotion.com/video/' + provider_id,
+ 'Dailymotion', provider_id)
+ elif player_name == 'YouTube':
+ entry = self.url_result(
+ provider_id, 'Youtube', provider_id)
+ elif player_name == 'NBCSports':
+ entry = self.url_result(
+ 'http://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/' + provider_id,
+ 'NBCSportsVPlayer', provider_id)
+ if entry:
+ entries.append(entry)
+ continue
+
+ video_id = video['uuid']
+ title = video['title']
+
+ formats = []
+ for file_ in video.get('videoFiles', []):
+ format_url = file_.get('url')
+ if not format_url:
+ continue
+ if 'format=m3u8-aapl' in format_url:
+ # m3u8_native should not be used here until
+ # https://github.com/ytdl-org/youtube-dl/issues/9913 is fixed
+ formats.extend(self._extract_m3u8_formats(
+ format_url, display_id, 'mp4',
+ m3u8_id='hls', fatal=False))
+ elif 'format=mpd-time-csf' in format_url:
+ formats.extend(self._extract_mpd_formats(
+ format_url, display_id, 'dash', fatal=False))
+ elif '.ism' in format_url:
+ if format_url.endswith('.ism'):
+ format_url += '/manifest'
+ formats.extend(self._extract_ism_formats(
+ format_url, display_id, 'mss', fatal=False))
+ else:
+ format_id = file_.get('formatCode')
+ formats.append({
+ 'url': format_url,
+ 'ext': 'mp4',
+ 'format_id': format_id,
+ 'width': int_or_none(file_.get('width')),
+ 'height': int_or_none(file_.get('height')),
+ 'vbr': int_or_none(self._search_regex(r'_(\d+)\.mp4', format_url, 'vbr', default=None)),
+ 'preference': 1 if format_id == '1001' else None,
+ })
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for file_ in video.get('files', []):
+ format_url = file_.get('url')
+ format_code = file_.get('formatCode')
+ if not format_url or not format_code:
+ continue
+ if compat_str(format_code) == '3100':
+ subtitles.setdefault(file_.get('culture', 'en'), []).append({
+ 'ext': determine_ext(format_url, 'ttml'),
+ 'url': format_url,
+ })
+
+ entries.append({
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'thumbnail': video.get('headlineImage', {}).get('url'),
+ 'duration': int_or_none(video.get('durationSecs')),
+ 'uploader': video.get('sourceFriendly'),
+ 'uploader_id': video.get('providerId'),
+ 'creator': video.get('creator'),
+ 'subtitles': subtitles,
+ 'formats': formats,
+ })
+
+ if not entries:
+ error = unescapeHTML(self._search_regex(
+ r'data-error=(["\'])(?P<error>.+?)\1',
+ webpage, 'error', group='error'))
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
+
+ return self.playlist_result(entries, page_id)
diff --git a/youtube_dlc/extractor/mtv.py b/youtube_dlc/extractor/mtv.py
new file mode 100644
index 000000000..fedd5f46b
--- /dev/null
+++ b/youtube_dlc/extractor/mtv.py
@@ -0,0 +1,474 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_xpath,
+)
+from ..utils import (
+ ExtractorError,
+ find_xpath_attr,
+ fix_xml_ampersands,
+ float_or_none,
+ HEADRequest,
+ RegexNotFoundError,
+ sanitized_Request,
+ strip_or_none,
+ timeconvert,
+ try_get,
+ unescapeHTML,
+ update_url_query,
+ url_basename,
+ xpath_text,
+)
+
+
+def _media_xml_tag(tag):
+ return '{http://search.yahoo.com/mrss/}%s' % tag
+
+
+class MTVServicesInfoExtractor(InfoExtractor):
+ _MOBILE_TEMPLATE = None
+ _LANG = None
+
+ @staticmethod
+ def _id_from_uri(uri):
+ return uri.split(':')[-1]
+
+ @staticmethod
+ def _remove_template_parameter(url):
+ # Remove the templates, like &device={device}
+ return re.sub(r'&[^=]*?={.*?}(?=(&|$))', '', url)
+
+ def _get_feed_url(self, uri):
+ return self._FEED_URL
+
+ def _get_thumbnail_url(self, uri, itemdoc):
+ search_path = '%s/%s' % (_media_xml_tag('group'), _media_xml_tag('thumbnail'))
+ thumb_node = itemdoc.find(search_path)
+ if thumb_node is None:
+ return None
+ return thumb_node.get('url') or thumb_node.text or None
+
+ def _extract_mobile_video_formats(self, mtvn_id):
+ webpage_url = self._MOBILE_TEMPLATE % mtvn_id
+ req = sanitized_Request(webpage_url)
+ # Otherwise we get a webpage that would execute some javascript
+ req.add_header('User-Agent', 'curl/7')
+ webpage = self._download_webpage(req, mtvn_id,
+ 'Downloading mobile page')
+ metrics_url = unescapeHTML(self._search_regex(r'<a href="(http://metrics.+?)"', webpage, 'url'))
+ req = HEADRequest(metrics_url)
+ response = self._request_webpage(req, mtvn_id, 'Resolving url')
+ url = response.geturl()
+ # Transform the url to get the best quality:
+ url = re.sub(r'.+pxE=mp4', 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=0+_pxK=18639+_pxE=mp4', url, 1)
+ return [{'url': url, 'ext': 'mp4'}]
+
+ def _extract_video_formats(self, mdoc, mtvn_id, video_id):
+ if re.match(r'.*/(error_country_block\.swf|geoblock\.mp4|copyright_error\.flv(?:\?geo\b.+?)?)$', mdoc.find('.//src').text) is not None:
+ if mtvn_id is not None and self._MOBILE_TEMPLATE is not None:
+ self.to_screen('The normal version is not available from your '
+ 'country, trying with the mobile version')
+ return self._extract_mobile_video_formats(mtvn_id)
+ raise ExtractorError('This video is not available from your country.',
+ expected=True)
+
+ formats = []
+ for rendition in mdoc.findall('.//rendition'):
+ if rendition.get('method') == 'hls':
+ hls_url = rendition.find('./src').text
+ formats.extend(self._extract_m3u8_formats(
+ hls_url, video_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ # fms
+ try:
+ _, _, ext = rendition.attrib['type'].partition('/')
+ rtmp_video_url = rendition.find('./src').text
+ if 'error_not_available.swf' in rtmp_video_url:
+ raise ExtractorError(
+ '%s said: video is not available' % self.IE_NAME,
+ expected=True)
+ if rtmp_video_url.endswith('siteunavail.png'):
+ continue
+ formats.extend([{
+ 'ext': 'flv' if rtmp_video_url.startswith('rtmp') else ext,
+ 'url': rtmp_video_url,
+ 'format_id': '-'.join(filter(None, [
+ 'rtmp' if rtmp_video_url.startswith('rtmp') else None,
+ rendition.get('bitrate')])),
+ 'width': int(rendition.get('width')),
+ 'height': int(rendition.get('height')),
+ }])
+ except (KeyError, TypeError):
+ raise ExtractorError('Invalid rendition field.')
+ if formats:
+ self._sort_formats(formats)
+ return formats
+
+ def _extract_subtitles(self, mdoc, mtvn_id):
+ subtitles = {}
+ for transcript in mdoc.findall('.//transcript'):
+ if transcript.get('kind') != 'captions':
+ continue
+ lang = transcript.get('srclang')
+ for typographic in transcript.findall('./typographic'):
+ sub_src = typographic.get('src')
+ if not sub_src:
+ continue
+ ext = typographic.get('format')
+ if ext == 'cea-608':
+ ext = 'scc'
+ subtitles.setdefault(lang, []).append({
+ 'url': compat_str(sub_src),
+ 'ext': ext
+ })
+ return subtitles
+
+ def _get_video_info(self, itemdoc, use_hls=True):
+ uri = itemdoc.find('guid').text
+ video_id = self._id_from_uri(uri)
+ self.report_extraction(video_id)
+ content_el = itemdoc.find('%s/%s' % (_media_xml_tag('group'), _media_xml_tag('content')))
+ mediagen_url = self._remove_template_parameter(content_el.attrib['url'])
+ mediagen_url = mediagen_url.replace('device={device}', '')
+ if 'acceptMethods' not in mediagen_url:
+ mediagen_url += '&' if '?' in mediagen_url else '?'
+ mediagen_url += 'acceptMethods='
+ mediagen_url += 'hls' if use_hls else 'fms'
+
+ mediagen_doc = self._download_xml(
+ mediagen_url, video_id, 'Downloading video urls', fatal=False)
+
+ if mediagen_doc is False:
+ return None
+
+ item = mediagen_doc.find('./video/item')
+ if item is not None and item.get('type') == 'text':
+ message = '%s returned error: ' % self.IE_NAME
+ if item.get('code') is not None:
+ message += '%s - ' % item.get('code')
+ message += item.text
+ raise ExtractorError(message, expected=True)
+
+ description = strip_or_none(xpath_text(itemdoc, 'description'))
+
+ timestamp = timeconvert(xpath_text(itemdoc, 'pubDate'))
+
+ title_el = None
+ if title_el is None:
+ title_el = find_xpath_attr(
+ itemdoc, './/{http://search.yahoo.com/mrss/}category',
+ 'scheme', 'urn:mtvn:video_title')
+ if title_el is None:
+ title_el = itemdoc.find(compat_xpath('.//{http://search.yahoo.com/mrss/}title'))
+ if title_el is None:
+ title_el = itemdoc.find(compat_xpath('.//title'))
+ if title_el.text is None:
+ title_el = None
+
+ title = title_el.text
+ if title is None:
+ raise ExtractorError('Could not find video title')
+ title = title.strip()
+
+ # This a short id that's used in the webpage urls
+ mtvn_id = None
+ mtvn_id_node = find_xpath_attr(itemdoc, './/{http://search.yahoo.com/mrss/}category',
+ 'scheme', 'urn:mtvn:id')
+ if mtvn_id_node is not None:
+ mtvn_id = mtvn_id_node.text
+
+ formats = self._extract_video_formats(mediagen_doc, mtvn_id, video_id)
+
+ # Some parts of complete video may be missing (e.g. missing Act 3 in
+ # http://www.southpark.de/alle-episoden/s14e01-sexual-healing)
+ if not formats:
+ return None
+
+ self._sort_formats(formats)
+
+ return {
+ 'title': title,
+ 'formats': formats,
+ 'subtitles': self._extract_subtitles(mediagen_doc, mtvn_id),
+ 'id': video_id,
+ 'thumbnail': self._get_thumbnail_url(uri, itemdoc),
+ 'description': description,
+ 'duration': float_or_none(content_el.attrib.get('duration')),
+ 'timestamp': timestamp,
+ }
+
+ def _get_feed_query(self, uri):
+ data = {'uri': uri}
+ if self._LANG:
+ data['lang'] = self._LANG
+ return data
+
+ def _get_videos_info(self, uri, use_hls=True):
+ video_id = self._id_from_uri(uri)
+ feed_url = self._get_feed_url(uri)
+ info_url = update_url_query(feed_url, self._get_feed_query(uri))
+ return self._get_videos_info_from_url(info_url, video_id, use_hls)
+
+ def _get_videos_info_from_url(self, url, video_id, use_hls=True):
+ idoc = self._download_xml(
+ url, video_id,
+ 'Downloading info', transform_source=fix_xml_ampersands)
+
+ title = xpath_text(idoc, './channel/title')
+ description = xpath_text(idoc, './channel/description')
+
+ entries = []
+ for item in idoc.findall('.//item'):
+ info = self._get_video_info(item, use_hls)
+ if info:
+ entries.append(info)
+
+ return self.playlist_result(
+ entries, playlist_title=title, playlist_description=description)
+
+ def _extract_triforce_mgid(self, webpage, data_zone=None, video_id=None):
+ triforce_feed = self._parse_json(self._search_regex(
+ r'triforceManifestFeed\s*=\s*({.+?})\s*;\s*\n', webpage,
+ 'triforce feed', default='{}'), video_id, fatal=False)
+
+ data_zone = self._search_regex(
+ r'data-zone=(["\'])(?P<zone>.+?_lc_promo.*?)\1', webpage,
+ 'data zone', default=data_zone, group='zone')
+
+ feed_url = try_get(
+ triforce_feed, lambda x: x['manifest']['zones'][data_zone]['feed'],
+ compat_str)
+ if not feed_url:
+ return
+
+ feed = self._download_json(feed_url, video_id, fatal=False)
+ if not feed:
+ return
+
+ return try_get(feed, lambda x: x['result']['data']['id'], compat_str)
+
+ def _extract_mgid(self, webpage):
+ try:
+ # the url can be http://media.mtvnservices.com/fb/{mgid}.swf
+ # or http://media.mtvnservices.com/{mgid}
+ og_url = self._og_search_video_url(webpage)
+ mgid = url_basename(og_url)
+ if mgid.endswith('.swf'):
+ mgid = mgid[:-4]
+ except RegexNotFoundError:
+ mgid = None
+
+ if mgid is None or ':' not in mgid:
+ mgid = self._search_regex(
+ [r'data-mgid="(.*?)"', r'swfobject\.embedSWF\(".*?(mgid:.*?)"'],
+ webpage, 'mgid', default=None)
+
+ if not mgid:
+ sm4_embed = self._html_search_meta(
+ 'sm4:video:embed', webpage, 'sm4 embed', default='')
+ mgid = self._search_regex(
+ r'embed/(mgid:.+?)["\'&?/]', sm4_embed, 'mgid', default=None)
+
+ if not mgid:
+ mgid = self._extract_triforce_mgid(webpage)
+
+ return mgid
+
+ def _real_extract(self, url):
+ title = url_basename(url)
+ webpage = self._download_webpage(url, title)
+ mgid = self._extract_mgid(webpage)
+ videos_info = self._get_videos_info(mgid)
+ return videos_info
+
+
+class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
+ IE_NAME = 'mtvservices:embedded'
+ _VALID_URL = r'https?://media\.mtvnservices\.com/embed/(?P<mgid>.+?)(\?|/|$)'
+
+ _TEST = {
+ # From http://www.thewrap.com/peter-dinklage-sums-up-game-of-thrones-in-45-seconds-video/
+ 'url': 'http://media.mtvnservices.com/embed/mgid:uma:video:mtv.com:1043906/cp~vid%3D1043906%26uri%3Dmgid%3Auma%3Avideo%3Amtv.com%3A1043906',
+ 'md5': 'cb349b21a7897164cede95bd7bf3fbb9',
+ 'info_dict': {
+ 'id': '1043906',
+ 'ext': 'mp4',
+ 'title': 'Peter Dinklage Sums Up \'Game Of Thrones\' In 45 Seconds',
+ 'description': '"Sexy sexy sexy, stabby stabby stabby, beautiful language," says Peter Dinklage as he tries summarizing "Game of Thrones" in under a minute.',
+ 'timestamp': 1400126400,
+ 'upload_date': '20140515',
+ },
+ }
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//media.mtvnservices.com/embed/.+?)\1', webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _get_feed_url(self, uri):
+ video_id = self._id_from_uri(uri)
+ config = self._download_json(
+ 'http://media.mtvnservices.com/pmt/e1/access/index.html?uri=%s&configtype=edge' % uri, video_id)
+ return self._remove_template_parameter(config['feedWithQueryParams'])
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ mgid = mobj.group('mgid')
+ return self._get_videos_info(mgid)
+
+
+class MTVIE(MTVServicesInfoExtractor):
+ IE_NAME = 'mtv'
+ _VALID_URL = r'https?://(?:www\.)?mtv\.com/(?:video-clips|(?:full-)?episodes)/(?P<id>[^/?#.]+)'
+ _FEED_URL = 'http://www.mtv.com/feeds/mrss/'
+
+ _TESTS = [{
+ 'url': 'http://www.mtv.com/video-clips/vl8qof/unlocking-the-truth-trailer',
+ 'md5': '1edbcdf1e7628e414a8c5dcebca3d32b',
+ 'info_dict': {
+ 'id': '5e14040d-18a4-47c4-a582-43ff602de88e',
+ 'ext': 'mp4',
+ 'title': 'Unlocking The Truth|July 18, 2016|1|101|Trailer',
+ 'description': '"Unlocking the Truth" premieres August 17th at 11/10c.',
+ 'timestamp': 1468846800,
+ 'upload_date': '20160718',
+ },
+ }, {
+ 'url': 'http://www.mtv.com/full-episodes/94tujl/unlocking-the-truth-gates-of-hell-season-1-ep-101',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.mtv.com/episodes/g8xu7q/teen-mom-2-breaking-the-wall-season-7-ep-713',
+ 'only_matching': True,
+ }]
+
+
+class MTVJapanIE(MTVServicesInfoExtractor):
+ IE_NAME = 'mtvjapan'
+ _VALID_URL = r'https?://(?:www\.)?mtvjapan\.com/videos/(?P<id>[0-9a-z]+)'
+
+ _TEST = {
+ 'url': 'http://www.mtvjapan.com/videos/prayht/fresh-info-cadillac-escalade',
+ 'info_dict': {
+ 'id': 'bc01da03-6fe5-4284-8880-f291f4e368f5',
+ 'ext': 'mp4',
+ 'title': '【Fresh Info】Cadillac ESCALADE Sport Edition',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }
+ _GEO_COUNTRIES = ['JP']
+ _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
+
+ def _get_feed_query(self, uri):
+ return {
+ 'arcEp': 'mtvjapan.com',
+ 'mgid': uri,
+ }
+
+
+class MTVVideoIE(MTVServicesInfoExtractor):
+ IE_NAME = 'mtv:video'
+ _VALID_URL = r'''(?x)^https?://
+ (?:(?:www\.)?mtv\.com/videos/.+?/(?P<videoid>[0-9]+)/[^/]+$|
+ m\.mtv\.com/videos/video\.rbml\?.*?id=(?P<mgid>[^&]+))'''
+
+ _FEED_URL = 'http://www.mtv.com/player/embed/AS3/rss/'
+
+ _TESTS = [
+ {
+ 'url': 'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml',
+ 'md5': '850f3f143316b1e71fa56a4edfd6e0f8',
+ 'info_dict': {
+ 'id': '853555',
+ 'ext': 'mp4',
+ 'title': 'Taylor Swift - "Ours (VH1 Storytellers)"',
+ 'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',
+ 'timestamp': 1352610000,
+ 'upload_date': '20121111',
+ },
+ },
+ ]
+
+ def _get_thumbnail_url(self, uri, itemdoc):
+ return 'http://mtv.mtvnimages.com/uri/' + uri
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('videoid')
+ uri = mobj.groupdict().get('mgid')
+ if uri is None:
+ webpage = self._download_webpage(url, video_id)
+
+ # Some videos come from Vevo.com
+ m_vevo = re.search(
+ r'(?s)isVevoVideo = true;.*?vevoVideoId = "(.*?)";', webpage)
+ if m_vevo:
+ vevo_id = m_vevo.group(1)
+ self.to_screen('Vevo video detected: %s' % vevo_id)
+ return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
+
+ uri = self._html_search_regex(r'/uri/(.*?)\?', webpage, 'uri')
+ return self._get_videos_info(uri)
+
+
+class MTVDEIE(MTVServicesInfoExtractor):
+ IE_NAME = 'mtv.de'
+ _VALID_URL = r'https?://(?:www\.)?mtv\.de/(?:musik/videoclips|folgen|news)/(?P<id>[0-9a-z]+)'
+ _TESTS = [{
+ 'url': 'http://www.mtv.de/musik/videoclips/2gpnv7/Traum',
+ 'info_dict': {
+ 'id': 'd5d472bc-f5b7-11e5-bffd-a4badb20dab5',
+ 'ext': 'mp4',
+ 'title': 'Traum',
+ 'description': 'Traum',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Blocked at Travis CI',
+ }, {
+ # mediagen URL without query (e.g. http://videos.mtvnn.com/mediagen/e865da714c166d18d6f80893195fcb97)
+ 'url': 'http://www.mtv.de/folgen/6b1ylu/teen-mom-2-enthuellungen-S5-F1',
+ 'info_dict': {
+ 'id': '1e5a878b-31c5-11e7-a442-0e40cf2fc285',
+ 'ext': 'mp4',
+ 'title': 'Teen Mom 2',
+ 'description': 'md5:dc65e357ef7e1085ed53e9e9d83146a7',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Blocked at Travis CI',
+ }, {
+ 'url': 'http://www.mtv.de/news/glolix/77491-mtv-movies-spotlight--pixels--teil-3',
+ 'info_dict': {
+ 'id': 'local_playlist-4e760566473c4c8c5344',
+ 'ext': 'mp4',
+ 'title': 'Article_mtv-movies-spotlight-pixels-teil-3_short-clips_part1',
+ 'description': 'MTV Movies Supercut',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Das Video kann zur Zeit nicht abgespielt werden.',
+ }]
+ _GEO_COUNTRIES = ['DE']
+ _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'
+
+ def _get_feed_query(self, uri):
+ return {
+ 'arcEp': 'mtv.de',
+ 'mgid': uri,
+ }
diff --git a/youtube_dl/extractor/muenchentv.py b/youtube_dlc/extractor/muenchentv.py
index 2cc2bf229..2cc2bf229 100644
--- a/youtube_dl/extractor/muenchentv.py
+++ b/youtube_dlc/extractor/muenchentv.py
diff --git a/youtube_dl/extractor/mwave.py b/youtube_dlc/extractor/mwave.py
index a67276596..a67276596 100644
--- a/youtube_dl/extractor/mwave.py
+++ b/youtube_dlc/extractor/mwave.py
diff --git a/youtube_dl/extractor/mychannels.py b/youtube_dlc/extractor/mychannels.py
index b1ffe7848..b1ffe7848 100644
--- a/youtube_dl/extractor/mychannels.py
+++ b/youtube_dlc/extractor/mychannels.py
diff --git a/youtube_dl/extractor/myspace.py b/youtube_dlc/extractor/myspace.py
index e164d5940..e164d5940 100644
--- a/youtube_dl/extractor/myspace.py
+++ b/youtube_dlc/extractor/myspace.py
diff --git a/youtube_dlc/extractor/myspass.py b/youtube_dlc/extractor/myspass.py
new file mode 100644
index 000000000..db7ebc94c
--- /dev/null
+++ b/youtube_dlc/extractor/myspass.py
@@ -0,0 +1,56 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ parse_duration,
+ xpath_text,
+)
+
+
+class MySpassIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?myspass\.de/([^/]+/)*(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.myspass.de/myspass/shows/tvshows/absolute-mehrheit/Absolute-Mehrheit-vom-17022013-Die-Highlights-Teil-2--/11741/',
+ 'md5': '0b49f4844a068f8b33f4b7c88405862b',
+ 'info_dict': {
+ 'id': '11741',
+ 'ext': 'mp4',
+ 'description': 'Wer kann in die Fußstapfen von Wolfgang Kubicki treten und die Mehrheit der Zuschauer hinter sich versammeln? Wird vielleicht sogar die Absolute Mehrheit geknackt und der Jackpot von 200.000 Euro mit nach Hause genommen?',
+ 'title': '17.02.2013 - Die Highlights, Teil 2',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ metadata = self._download_xml(
+ 'http://www.myspass.de/myspass/includes/apps/video/getvideometadataxml.php?id=' + video_id,
+ video_id)
+
+ title = xpath_text(metadata, 'title', fatal=True)
+ video_url = xpath_text(metadata, 'url_flv', 'download url', True)
+ video_id_int = int(video_id)
+ for group in re.search(r'/myspass2009/\d+/(\d+)/(\d+)/(\d+)/', video_url).groups():
+ group_int = int(group)
+ if group_int > video_id_int:
+ video_url = video_url.replace(
+ group, compat_str(group_int // video_id_int))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'thumbnail': xpath_text(metadata, 'imagePreview'),
+ 'description': xpath_text(metadata, 'description'),
+ 'duration': parse_duration(xpath_text(metadata, 'duration')),
+ 'series': xpath_text(metadata, 'format'),
+ 'season_number': int_or_none(xpath_text(metadata, 'season')),
+ 'season_id': xpath_text(metadata, 'season_id'),
+ 'episode': title,
+ 'episode_number': int_or_none(xpath_text(metadata, 'episode')),
+ }
diff --git a/youtube_dl/extractor/myvi.py b/youtube_dlc/extractor/myvi.py
index 75d286365..75d286365 100644
--- a/youtube_dl/extractor/myvi.py
+++ b/youtube_dlc/extractor/myvi.py
diff --git a/youtube_dl/extractor/myvidster.py b/youtube_dlc/extractor/myvidster.py
index 2117d302d..2117d302d 100644
--- a/youtube_dl/extractor/myvidster.py
+++ b/youtube_dlc/extractor/myvidster.py
diff --git a/youtube_dl/extractor/nationalgeographic.py b/youtube_dlc/extractor/nationalgeographic.py
index ee12e2b47..ee12e2b47 100644
--- a/youtube_dl/extractor/nationalgeographic.py
+++ b/youtube_dlc/extractor/nationalgeographic.py
diff --git a/youtube_dlc/extractor/naver.py b/youtube_dlc/extractor/naver.py
new file mode 100644
index 000000000..61fc59126
--- /dev/null
+++ b/youtube_dlc/extractor/naver.py
@@ -0,0 +1,166 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ dict_get,
+ ExtractorError,
+ int_or_none,
+ parse_duration,
+ try_get,
+ update_url_query,
+)
+
+
+class NaverBaseIE(InfoExtractor):
+ _CAPTION_EXT_RE = r'\.(?:ttml|vtt)'
+
+ def _extract_video_info(self, video_id, vid, key):
+ video_data = self._download_json(
+ 'http://play.rmcnmv.naver.com/vod/play/v2.0/' + vid,
+ video_id, query={
+ 'key': key,
+ })
+ meta = video_data['meta']
+ title = meta['subject']
+ formats = []
+ get_list = lambda x: try_get(video_data, lambda y: y[x + 's']['list'], list) or []
+
+ def extract_formats(streams, stream_type, query={}):
+ for stream in streams:
+ stream_url = stream.get('source')
+ if not stream_url:
+ continue
+ stream_url = update_url_query(stream_url, query)
+ encoding_option = stream.get('encodingOption', {})
+ bitrate = stream.get('bitrate', {})
+ formats.append({
+ 'format_id': '%s_%s' % (stream.get('type') or stream_type, dict_get(encoding_option, ('name', 'id'))),
+ 'url': stream_url,
+ 'width': int_or_none(encoding_option.get('width')),
+ 'height': int_or_none(encoding_option.get('height')),
+ 'vbr': int_or_none(bitrate.get('video')),
+ 'abr': int_or_none(bitrate.get('audio')),
+ 'filesize': int_or_none(stream.get('size')),
+ 'protocol': 'm3u8_native' if stream_type == 'HLS' else None,
+ })
+
+ extract_formats(get_list('video'), 'H264')
+ for stream_set in video_data.get('streams', []):
+ query = {}
+ for param in stream_set.get('keys', []):
+ query[param['name']] = param['value']
+ stream_type = stream_set.get('type')
+ videos = stream_set.get('videos')
+ if videos:
+ extract_formats(videos, stream_type, query)
+ elif stream_type == 'HLS':
+ stream_url = stream_set.get('source')
+ if not stream_url:
+ continue
+ formats.extend(self._extract_m3u8_formats(
+ update_url_query(stream_url, query), video_id,
+ 'mp4', 'm3u8_native', m3u8_id=stream_type, fatal=False))
+ self._sort_formats(formats)
+
+ replace_ext = lambda x, y: re.sub(self._CAPTION_EXT_RE, '.' + y, x)
+
+ def get_subs(caption_url):
+ if re.search(self._CAPTION_EXT_RE, caption_url):
+ return [{
+ 'url': replace_ext(caption_url, 'ttml'),
+ }, {
+ 'url': replace_ext(caption_url, 'vtt'),
+ }]
+ else:
+ return [{'url': caption_url}]
+
+ automatic_captions = {}
+ subtitles = {}
+ for caption in get_list('caption'):
+ caption_url = caption.get('source')
+ if not caption_url:
+ continue
+ sub_dict = automatic_captions if caption.get('type') == 'auto' else subtitles
+ sub_dict.setdefault(dict_get(caption, ('locale', 'language')), []).extend(get_subs(caption_url))
+
+ user = meta.get('user', {})
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'automatic_captions': automatic_captions,
+ 'thumbnail': try_get(meta, lambda x: x['cover']['source']),
+ 'view_count': int_or_none(meta.get('count')),
+ 'uploader_id': user.get('id'),
+ 'uploader': user.get('name'),
+ 'uploader_url': user.get('url'),
+ }
+
+
+class NaverIE(NaverBaseIE):
+ _VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/(?:v|embed)/(?P<id>\d+)'
+ _GEO_BYPASS = False
+ _TESTS = [{
+ 'url': 'http://tv.naver.com/v/81652',
+ 'info_dict': {
+ 'id': '81652',
+ 'ext': 'mp4',
+ 'title': '[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
+ 'description': '메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
+ 'timestamp': 1378200754,
+ 'upload_date': '20130903',
+ 'uploader': '메가스터디, 합격불변의 법칙',
+ 'uploader_id': 'megastudy',
+ },
+ }, {
+ 'url': 'http://tv.naver.com/v/395837',
+ 'md5': '8a38e35354d26a17f73f4e90094febd3',
+ 'info_dict': {
+ 'id': '395837',
+ 'ext': 'mp4',
+ 'title': '9년이 지나도 아픈 기억, 전효성의 아버지',
+ 'description': 'md5:eb6aca9d457b922e43860a2a2b1984d3',
+ 'timestamp': 1432030253,
+ 'upload_date': '20150519',
+ 'uploader': '4가지쇼 시즌2',
+ 'uploader_id': 'wrappinguser29',
+ },
+ 'skip': 'Georestricted',
+ }, {
+ 'url': 'http://tvcast.naver.com/v/81652',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ content = self._download_json(
+ 'https://tv.naver.com/api/json/v/' + video_id,
+ video_id, headers=self.geo_verification_headers())
+ player_info_json = content.get('playerInfoJson') or {}
+ current_clip = player_info_json.get('currentClip') or {}
+
+ vid = current_clip.get('videoId')
+ in_key = current_clip.get('inKey')
+
+ if not vid or not in_key:
+ player_auth = try_get(player_info_json, lambda x: x['playerOption']['auth'])
+ if player_auth == 'notCountry':
+ self.raise_geo_restricted(countries=['KR'])
+ elif player_auth == 'notLogin':
+ self.raise_login_required()
+ raise ExtractorError('couldn\'t extract vid and key')
+ info = self._extract_video_info(video_id, vid, in_key)
+ info.update({
+ 'description': clean_html(current_clip.get('description')),
+ 'timestamp': int_or_none(current_clip.get('firstExposureTime'), 1000),
+ 'duration': parse_duration(current_clip.get('displayPlayTime')),
+ 'like_count': int_or_none(current_clip.get('recommendPoint')),
+ 'age_limit': 19 if current_clip.get('adult') else None,
+ })
+ return info
diff --git a/youtube_dl/extractor/nba.py b/youtube_dlc/extractor/nba.py
index be295a7a3..be295a7a3 100644
--- a/youtube_dl/extractor/nba.py
+++ b/youtube_dlc/extractor/nba.py
diff --git a/youtube_dlc/extractor/nbc.py b/youtube_dlc/extractor/nbc.py
new file mode 100644
index 000000000..6f3cb3003
--- /dev/null
+++ b/youtube_dlc/extractor/nbc.py
@@ -0,0 +1,541 @@
+from __future__ import unicode_literals
+
+import base64
+import json
+import re
+
+from .common import InfoExtractor
+from .theplatform import ThePlatformIE
+from .adobepass import AdobePassIE
+from ..compat import compat_urllib_parse_unquote
+from ..utils import (
+ int_or_none,
+ js_to_json,
+ parse_duration,
+ smuggle_url,
+ try_get,
+ unified_timestamp,
+ update_url_query,
+)
+
+
+class NBCIE(AdobePassIE):
+ _VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?P<id>n?\d+))'
+
+ _TESTS = [
+ {
+ 'url': 'http://www.nbc.com/the-tonight-show/video/jimmy-fallon-surprises-fans-at-ben-jerrys/2848237',
+ 'info_dict': {
+ 'id': '2848237',
+ 'ext': 'mp4',
+ 'title': 'Jimmy Fallon Surprises Fans at Ben & Jerry\'s',
+ 'description': 'Jimmy gives out free scoops of his new "Tonight Dough" ice cream flavor by surprising customers at the Ben & Jerry\'s scoop shop.',
+ 'timestamp': 1424246400,
+ 'upload_date': '20150218',
+ 'uploader': 'NBCU-COM',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821',
+ 'info_dict': {
+ 'id': '2832821',
+ 'ext': 'mp4',
+ 'title': 'Star Wars Teaser',
+ 'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442',
+ 'timestamp': 1417852800,
+ 'upload_date': '20141206',
+ 'uploader': 'NBCU-COM',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'Only works from US',
+ },
+ {
+ # HLS streams requires the 'hdnea3' cookie
+ 'url': 'http://www.nbc.com/Kings/video/goliath/n1806',
+ 'info_dict': {
+ 'id': '101528f5a9e8127b107e98c5e6ce4638',
+ 'ext': 'mp4',
+ 'title': 'Goliath',
+ 'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.',
+ 'timestamp': 1237100400,
+ 'upload_date': '20090315',
+ 'uploader': 'NBCU-COM',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Only works from US',
+ },
+ {
+ 'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310',
+ 'only_matching': True,
+ },
+ {
+ # Percent escaped url
+ 'url': 'https://www.nbc.com/up-all-night/video/day-after-valentine%27s-day/n2189',
+ 'only_matching': True,
+ }
+ ]
+
+ def _real_extract(self, url):
+ permalink, video_id = re.match(self._VALID_URL, url).groups()
+ permalink = 'http' + compat_urllib_parse_unquote(permalink)
+ video_data = self._download_json(
+ 'https://friendship.nbc.co/v2/graphql', video_id, query={
+ 'query': '''query bonanzaPage(
+ $app: NBCUBrands! = nbc
+ $name: String!
+ $oneApp: Boolean
+ $platform: SupportedPlatforms! = web
+ $type: EntityPageType! = VIDEO
+ $userId: String!
+) {
+ bonanzaPage(
+ app: $app
+ name: $name
+ oneApp: $oneApp
+ platform: $platform
+ type: $type
+ userId: $userId
+ ) {
+ metadata {
+ ... on VideoPageData {
+ description
+ episodeNumber
+ keywords
+ locked
+ mpxAccountId
+ mpxGuid
+ rating
+ resourceId
+ seasonNumber
+ secondaryTitle
+ seriesShortTitle
+ }
+ }
+ }
+}''',
+ 'variables': json.dumps({
+ 'name': permalink,
+ 'oneApp': True,
+ 'userId': '0',
+ }),
+ })['data']['bonanzaPage']['metadata']
+ query = {
+ 'mbr': 'true',
+ 'manifest': 'm3u',
+ }
+ video_id = video_data['mpxGuid']
+ title = video_data['secondaryTitle']
+ if video_data.get('locked'):
+ resource = self._get_mvpd_resource(
+ video_data.get('resourceId') or 'nbcentertainment',
+ title, video_id, video_data.get('rating'))
+ query['auth'] = self._extract_mvpd_auth(
+ url, video_id, 'nbcentertainment', resource)
+ theplatform_url = smuggle_url(update_url_query(
+ 'http://link.theplatform.com/s/NnzsPC/media/guid/%s/%s' % (video_data.get('mpxAccountId') or '2410887629', video_id),
+ query), {'force_smil_url': True})
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'title': title,
+ 'url': theplatform_url,
+ 'description': video_data.get('description'),
+ 'tags': video_data.get('keywords'),
+ 'season_number': int_or_none(video_data.get('seasonNumber')),
+ 'episode_number': int_or_none(video_data.get('episodeNumber')),
+ 'episode': title,
+ 'series': video_data.get('seriesShortTitle'),
+ 'ie_key': 'ThePlatform',
+ }
+
+
+class NBCSportsVPlayerIE(InfoExtractor):
+ _VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
+
+ _TESTS = [{
+ 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/9CsDKds0kvHI',
+ 'info_dict': {
+ 'id': '9CsDKds0kvHI',
+ 'ext': 'mp4',
+ 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
+ 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
+ 'timestamp': 1426270238,
+ 'upload_date': '20150313',
+ 'uploader': 'NBCU-SPORTS',
+ }
+ }, {
+ 'url': 'https://vplayer.nbcsports.com/p/BxmELC/nbcsports_embed/select/media/_hqLjQ95yx8Z',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_url(webpage):
+ iframe_m = re.search(
+ r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage)
+ if iframe_m:
+ return iframe_m.group('url')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ theplatform_url = self._og_search_video_url(webpage).replace(
+ 'vplayer.nbcsports.com', 'player.theplatform.com')
+ return self.url_result(theplatform_url, 'ThePlatform')
+
+
+class NBCSportsIE(InfoExtractor):
+ # Does not include https because its certificate is invalid
+ _VALID_URL = r'https?://(?:www\.)?nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
+
+ _TEST = {
+ 'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
+ 'info_dict': {
+ 'id': 'PHJSaFWbrTY9',
+ 'ext': 'flv',
+ 'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
+ 'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
+ 'uploader': 'NBCU-SPORTS',
+ 'upload_date': '20150330',
+ 'timestamp': 1427726529,
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ return self.url_result(
+ NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
+
+
+class NBCSportsStreamIE(AdobePassIE):
+ _VALID_URL = r'https?://stream\.nbcsports\.com/.+?\bpid=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://stream.nbcsports.com/nbcsn/generic?pid=206559',
+ 'info_dict': {
+ 'id': '206559',
+ 'ext': 'mp4',
+ 'title': 'Amgen Tour of California Women\'s Recap',
+ 'description': 'md5:66520066b3b5281ada7698d0ea2aa894',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'Requires Adobe Pass Authentication',
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ live_source = self._download_json(
+ 'http://stream.nbcsports.com/data/live_sources_%s.json' % video_id,
+ video_id)
+ video_source = live_source['videoSources'][0]
+ title = video_source['title']
+ source_url = None
+ for k in ('source', 'msl4source', 'iossource', 'hlsv4'):
+ sk = k + 'Url'
+ source_url = video_source.get(sk) or video_source.get(sk + 'Alt')
+ if source_url:
+ break
+ else:
+ source_url = video_source['ottStreamUrl']
+ is_live = video_source.get('type') == 'live' or video_source.get('status') == 'Live'
+ resource = self._get_mvpd_resource('nbcsports', title, video_id, '')
+ token = self._extract_mvpd_auth(url, video_id, 'nbcsports', resource)
+ tokenized_url = self._download_json(
+ 'https://token.playmakerservices.com/cdn',
+ video_id, data=json.dumps({
+ 'requestorId': 'nbcsports',
+ 'pid': video_id,
+ 'application': 'NBCSports',
+ 'version': 'v1',
+ 'platform': 'desktop',
+ 'cdn': 'akamai',
+ 'url': video_source['sourceUrl'],
+ 'token': base64.b64encode(token.encode()).decode(),
+ 'resourceId': base64.b64encode(resource.encode()).decode(),
+ }).encode())['tokenizedUrl']
+ formats = self._extract_m3u8_formats(tokenized_url, video_id, 'mp4')
+ self._sort_formats(formats)
+ return {
+ 'id': video_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'description': live_source.get('description'),
+ 'formats': formats,
+ 'is_live': is_live,
+ }
+
+
+class CSNNEIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?csnne\.com/video/(?P<id>[0-9a-z-]+)'
+
+ _TEST = {
+ 'url': 'http://www.csnne.com/video/snc-evening-update-wright-named-red-sox-no-5-starter',
+ 'info_dict': {
+ 'id': 'yvBLLUgQ8WU0',
+ 'ext': 'mp4',
+ 'title': 'SNC evening update: Wright named Red Sox\' No. 5 starter.',
+ 'description': 'md5:1753cfee40d9352b19b4c9b3e589b9e3',
+ 'timestamp': 1459369979,
+ 'upload_date': '20160330',
+ 'uploader': 'NBCU-SPORTS',
+ }
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'ThePlatform',
+ 'url': self._html_search_meta('twitter:player:stream', webpage),
+ 'display_id': display_id,
+ }
+
+
+class NBCNewsIE(ThePlatformIE):
+ _VALID_URL = r'(?x)https?://(?:www\.)?(?:nbcnews|today|msnbc)\.com/([^/]+/)*(?:.*-)?(?P<id>[^/?]+)'
+
+ _TESTS = [
+ {
+ 'url': 'http://www.nbcnews.com/watch/nbcnews-com/how-twitter-reacted-to-the-snowden-interview-269389891880',
+ 'md5': 'cf4bc9e6ce0130f00f545d80ecedd4bf',
+ 'info_dict': {
+ 'id': '269389891880',
+ 'ext': 'mp4',
+ 'title': 'How Twitter Reacted To The Snowden Interview',
+ 'description': 'md5:65a0bd5d76fe114f3c2727aa3a81fe64',
+ 'timestamp': 1401363060,
+ 'upload_date': '20140529',
+ },
+ },
+ {
+ 'url': 'http://www.nbcnews.com/feature/dateline-full-episodes/full-episode-family-business-n285156',
+ 'md5': 'fdbf39ab73a72df5896b6234ff98518a',
+ 'info_dict': {
+ 'id': '529953347624',
+ 'ext': 'mp4',
+ 'title': 'FULL EPISODE: Family Business',
+ 'description': 'md5:757988edbaae9d7be1d585eb5d55cc04',
+ },
+ 'skip': 'This page is unavailable.',
+ },
+ {
+ 'url': 'http://www.nbcnews.com/nightly-news/video/nightly-news-with-brian-williams-full-broadcast-february-4-394064451844',
+ 'md5': '8eb831eca25bfa7d25ddd83e85946548',
+ 'info_dict': {
+ 'id': '394064451844',
+ 'ext': 'mp4',
+ 'title': 'Nightly News with Brian Williams Full Broadcast (February 4)',
+ 'description': 'md5:1c10c1eccbe84a26e5debb4381e2d3c5',
+ 'timestamp': 1423104900,
+ 'upload_date': '20150205',
+ },
+ },
+ {
+ 'url': 'http://www.nbcnews.com/business/autos/volkswagen-11-million-vehicles-could-have-suspect-software-emissions-scandal-n431456',
+ 'md5': '4a8c4cec9e1ded51060bdda36ff0a5c0',
+ 'info_dict': {
+ 'id': 'n431456',
+ 'ext': 'mp4',
+ 'title': "Volkswagen U.S. Chief: We 'Totally Screwed Up'",
+ 'description': 'md5:d22d1281a24f22ea0880741bb4dd6301',
+ 'upload_date': '20150922',
+ 'timestamp': 1442917800,
+ },
+ },
+ {
+ 'url': 'http://www.today.com/video/see-the-aurora-borealis-from-space-in-stunning-new-nasa-video-669831235788',
+ 'md5': '118d7ca3f0bea6534f119c68ef539f71',
+ 'info_dict': {
+ 'id': '669831235788',
+ 'ext': 'mp4',
+ 'title': 'See the aurora borealis from space in stunning new NASA video',
+ 'description': 'md5:74752b7358afb99939c5f8bb2d1d04b1',
+ 'upload_date': '20160420',
+ 'timestamp': 1461152093,
+ },
+ },
+ {
+ 'url': 'http://www.msnbc.com/all-in-with-chris-hayes/watch/the-chaotic-gop-immigration-vote-314487875924',
+ 'md5': '6d236bf4f3dddc226633ce6e2c3f814d',
+ 'info_dict': {
+ 'id': '314487875924',
+ 'ext': 'mp4',
+ 'title': 'The chaotic GOP immigration vote',
+ 'description': 'The Republican House votes on a border bill that has no chance of getting through the Senate or signed by the President and is drawing criticism from all sides.',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1406937606,
+ 'upload_date': '20140802',
+ },
+ },
+ {
+ 'url': 'http://www.nbcnews.com/watch/dateline/full-episode--deadly-betrayal-386250819952',
+ 'only_matching': True,
+ },
+ {
+ # From http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html
+ 'url': 'http://www.nbcnews.com/widget/video-embed/701714499682',
+ 'only_matching': True,
+ },
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ data = self._parse_json(self._search_regex(
+ r'window\.__data\s*=\s*({.+});', webpage,
+ 'bootstrap json'), video_id, js_to_json)
+ video_data = try_get(data, lambda x: x['video']['current'], dict)
+ if not video_data:
+ video_data = data['article']['content'][0]['primaryMedia']['video']
+ title = video_data['headline']['primary']
+
+ formats = []
+ for va in video_data.get('videoAssets', []):
+ public_url = va.get('publicUrl')
+ if not public_url:
+ continue
+ if '://link.theplatform.com/' in public_url:
+ public_url = update_url_query(public_url, {'format': 'redirect'})
+ format_id = va.get('format')
+ if format_id == 'M3U':
+ formats.extend(self._extract_m3u8_formats(
+ public_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id=format_id, fatal=False))
+ continue
+ tbr = int_or_none(va.get('bitrate'), 1000)
+ if tbr:
+ format_id += '-%d' % tbr
+ formats.append({
+ 'format_id': format_id,
+ 'url': public_url,
+ 'width': int_or_none(va.get('width')),
+ 'height': int_or_none(va.get('height')),
+ 'tbr': tbr,
+ 'ext': 'mp4',
+ })
+ self._sort_formats(formats)
+
+ subtitles = {}
+ closed_captioning = video_data.get('closedCaptioning')
+ if closed_captioning:
+ for cc_url in closed_captioning.values():
+ if not cc_url:
+ continue
+ subtitles.setdefault('en', []).append({
+ 'url': cc_url,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': try_get(video_data, lambda x: x['description']['primary']),
+ 'thumbnail': try_get(video_data, lambda x: x['primaryImage']['url']['primary']),
+ 'duration': parse_duration(video_data.get('duration')),
+ 'timestamp': unified_timestamp(video_data.get('datePublished')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class NBCOlympicsIE(InfoExtractor):
+ IE_NAME = 'nbcolympics'
+ _VALID_URL = r'https?://www\.nbcolympics\.com/video/(?P<id>[a-z-]+)'
+
+ _TEST = {
+ # Geo-restricted to US
+ 'url': 'http://www.nbcolympics.com/video/justin-roses-son-leo-was-tears-after-his-dad-won-gold',
+ 'md5': '54fecf846d05429fbaa18af557ee523a',
+ 'info_dict': {
+ 'id': 'WjTBzDXx5AUq',
+ 'display_id': 'justin-roses-son-leo-was-tears-after-his-dad-won-gold',
+ 'ext': 'mp4',
+ 'title': 'Rose\'s son Leo was in tears after his dad won gold',
+ 'description': 'Olympic gold medalist Justin Rose gets emotional talking to the impact his win in men\'s golf has already had on his children.',
+ 'timestamp': 1471274964,
+ 'upload_date': '20160815',
+ 'uploader': 'NBCU-SPORTS',
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ drupal_settings = self._parse_json(self._search_regex(
+ r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
+ webpage, 'drupal settings'), display_id)
+
+ iframe_url = drupal_settings['vod']['iframe_url']
+ theplatform_url = iframe_url.replace(
+ 'vplayer.nbcolympics.com', 'player.theplatform.com')
+
+ return {
+ '_type': 'url_transparent',
+ 'url': theplatform_url,
+ 'ie_key': ThePlatformIE.ie_key(),
+ 'display_id': display_id,
+ }
+
+
+class NBCOlympicsStreamIE(AdobePassIE):
+ IE_NAME = 'nbcolympics:stream'
+ _VALID_URL = r'https?://stream\.nbcolympics\.com/(?P<id>[0-9a-z-]+)'
+ _TEST = {
+ 'url': 'http://stream.nbcolympics.com/2018-winter-olympics-nbcsn-evening-feb-8',
+ 'info_dict': {
+ 'id': '203493',
+ 'ext': 'mp4',
+ 'title': 're:Curling, Alpine, Luge [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+ _DATA_URL_TEMPLATE = 'http://stream.nbcolympics.com/data/%s_%s.json'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ pid = self._search_regex(r'pid\s*=\s*(\d+);', webpage, 'pid')
+ resource = self._search_regex(
+ r"resource\s*=\s*'(.+)';", webpage,
+ 'resource').replace("' + pid + '", pid)
+ event_config = self._download_json(
+ self._DATA_URL_TEMPLATE % ('event_config', pid),
+ pid)['eventConfig']
+ title = self._live_title(event_config['eventTitle'])
+ source_url = self._download_json(
+ self._DATA_URL_TEMPLATE % ('live_sources', pid),
+ pid)['videoSources'][0]['sourceUrl']
+ media_token = self._extract_mvpd_auth(
+ url, pid, event_config.get('requestorId', 'NBCOlympics'), resource)
+ formats = self._extract_m3u8_formats(self._download_webpage(
+ 'http://sp.auth.adobe.com/tvs/v1/sign', pid, query={
+ 'cdn': 'akamai',
+ 'mediaToken': base64.b64encode(media_token.encode()),
+ 'resource': base64.b64encode(resource.encode()),
+ 'url': source_url,
+ }), pid, 'mp4')
+ self._sort_formats(formats)
+
+ return {
+ 'id': pid,
+ 'display_id': display_id,
+ 'title': title,
+ 'formats': formats,
+ 'is_live': True,
+ }
diff --git a/youtube_dlc/extractor/ndr.py b/youtube_dlc/extractor/ndr.py
new file mode 100644
index 000000000..2447c812e
--- /dev/null
+++ b/youtube_dlc/extractor/ndr.py
@@ -0,0 +1,402 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ merge_dicts,
+ parse_iso8601,
+ qualities,
+ try_get,
+ urljoin,
+)
+
+
+class NDRBaseIE(InfoExtractor):
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id = next(group for group in mobj.groups() if group)
+ webpage = self._download_webpage(url, display_id)
+ return self._extract_embed(webpage, display_id)
+
+
+class NDRIE(NDRBaseIE):
+ IE_NAME = 'ndr'
+ IE_DESC = 'NDR.de - Norddeutscher Rundfunk'
+ _VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html'
+ _TESTS = [{
+ # httpVideo, same content id
+ 'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html',
+ 'md5': '6515bc255dc5c5f8c85bbc38e035a659',
+ 'info_dict': {
+ 'id': 'hafengeburtstag988',
+ 'display_id': 'Party-Poette-und-Parade',
+ 'ext': 'mp4',
+ 'title': 'Party, Pötte und Parade',
+ 'description': 'md5:ad14f9d2f91d3040b6930c697e5f6b4c',
+ 'uploader': 'ndrtv',
+ 'timestamp': 1431108900,
+ 'upload_date': '20150510',
+ 'duration': 3498,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # httpVideo, different content id
+ 'url': 'http://www.ndr.de/sport/fussball/40-Osnabrueck-spielt-sich-in-einen-Rausch,osna270.html',
+ 'md5': '1043ff203eab307f0c51702ec49e9a71',
+ 'info_dict': {
+ 'id': 'osna272',
+ 'display_id': '40-Osnabrueck-spielt-sich-in-einen-Rausch',
+ 'ext': 'mp4',
+ 'title': 'Osnabrück - Wehen Wiesbaden: Die Highlights',
+ 'description': 'md5:32e9b800b3d2d4008103752682d5dc01',
+ 'uploader': 'ndrtv',
+ 'timestamp': 1442059200,
+ 'upload_date': '20150912',
+ 'duration': 510,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # httpAudio, same content id
+ 'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html',
+ 'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
+ 'info_dict': {
+ 'id': 'audio51535',
+ 'display_id': 'La-Valette-entgeht-der-Hinrichtung',
+ 'ext': 'mp3',
+ 'title': 'La Valette entgeht der Hinrichtung',
+ 'description': 'md5:22f9541913a40fe50091d5cdd7c9f536',
+ 'uploader': 'ndrinfo',
+ 'timestamp': 1290626100,
+ 'upload_date': '20140729',
+ 'duration': 884,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html',
+ 'only_matching': True,
+ }]
+
+ def _extract_embed(self, webpage, display_id):
+ embed_url = self._html_search_meta(
+ 'embedURL', webpage, 'embed URL',
+ default=None) or self._search_regex(
+ r'\bembedUrl["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'embed URL', group='url')
+ description = self._search_regex(
+ r'<p[^>]+itemprop="description">([^<]+)</p>',
+ webpage, 'description', default=None) or self._og_search_description(webpage)
+ timestamp = parse_iso8601(
+ self._search_regex(
+ r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="([^"]+)"',
+ webpage, 'upload date', default=None))
+ info = self._search_json_ld(webpage, display_id, default={})
+ return merge_dicts({
+ '_type': 'url_transparent',
+ 'url': embed_url,
+ 'display_id': display_id,
+ 'description': description,
+ 'timestamp': timestamp,
+ }, info)
+
+
+class NJoyIE(NDRBaseIE):
+ IE_NAME = 'njoy'
+ IE_DESC = 'N-JOY'
+ _VALID_URL = r'https?://(?:www\.)?n-joy\.de/(?:[^/]+/)*(?:(?P<display_id>[^/?#]+),)?(?P<id>[\da-z]+)\.html'
+ _TESTS = [{
+ # httpVideo, same content id
+ 'url': 'http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html',
+ 'md5': 'cb63be60cd6f9dd75218803146d8dc67',
+ 'info_dict': {
+ 'id': 'comedycontest2480',
+ 'display_id': 'Benaissa-beim-NDR-Comedy-Contest',
+ 'ext': 'mp4',
+ 'title': 'Benaissa beim NDR Comedy Contest',
+ 'description': 'md5:f057a6c4e1c728b10d33b5ffd36ddc39',
+ 'uploader': 'ndrtv',
+ 'upload_date': '20141129',
+ 'duration': 654,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # httpVideo, different content id
+ 'url': 'http://www.n-joy.de/musik/Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-,felixjaehn168.html',
+ 'md5': '417660fffa90e6df2fda19f1b40a64d8',
+ 'info_dict': {
+ 'id': 'dockville882',
+ 'display_id': 'Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-',
+ 'ext': 'mp4',
+ 'title': '"Ich hab noch nie" mit Felix Jaehn',
+ 'description': 'md5:85dd312d53be1b99e1f998a16452a2f3',
+ 'uploader': 'njoy',
+ 'upload_date': '20150822',
+ 'duration': 211,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.n-joy.de/radio/webradio/morningshow209.html',
+ 'only_matching': True,
+ }]
+
+ def _extract_embed(self, webpage, display_id):
+ video_id = self._search_regex(
+ r'<iframe[^>]+id="pp_([\da-z]+)"', webpage, 'embed id')
+ description = self._search_regex(
+ r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>',
+ webpage, 'description', fatal=False)
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'NDREmbedBase',
+ 'url': 'ndr:%s' % video_id,
+ 'display_id': display_id,
+ 'description': description,
+ }
+
+
+class NDREmbedBaseIE(InfoExtractor):
+ IE_NAME = 'ndr:embed:base'
+ _VALID_URL = r'(?:ndr:(?P<id_s>[\da-z]+)|https?://www\.ndr\.de/(?P<id>[\da-z]+)-ppjson\.json)'
+ _TESTS = [{
+ 'url': 'ndr:soundcheck3366',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ndr.de/soundcheck3366-ppjson.json',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id') or mobj.group('id_s')
+
+ ppjson = self._download_json(
+ 'http://www.ndr.de/%s-ppjson.json' % video_id, video_id)
+
+ playlist = ppjson['playlist']
+
+ formats = []
+ quality_key = qualities(('xs', 's', 'm', 'l', 'xl'))
+
+ for format_id, f in playlist.items():
+ src = f.get('src')
+ if not src:
+ continue
+ ext = determine_ext(src, None)
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ src + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id,
+ f4m_id='hds', fatal=False))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ src, video_id, 'mp4', m3u8_id='hls',
+ entry_protocol='m3u8_native', fatal=False))
+ else:
+ quality = f.get('quality')
+ ff = {
+ 'url': src,
+ 'format_id': quality or format_id,
+ 'quality': quality_key(quality),
+ }
+ type_ = f.get('type')
+ if type_ and type_.split('/')[0] == 'audio':
+ ff['vcodec'] = 'none'
+ ff['ext'] = ext or 'mp3'
+ formats.append(ff)
+ self._sort_formats(formats)
+
+ config = playlist['config']
+
+ live = playlist.get('config', {}).get('streamType') in ['httpVideoLive', 'httpAudioLive']
+ title = config['title']
+ if live:
+ title = self._live_title(title)
+ uploader = ppjson.get('config', {}).get('branding')
+ upload_date = ppjson.get('config', {}).get('publicationDate')
+ duration = int_or_none(config.get('duration'))
+
+ thumbnails = []
+ poster = try_get(config, lambda x: x['poster'], dict) or {}
+ for thumbnail_id, thumbnail in poster.items():
+ thumbnail_url = urljoin(url, thumbnail.get('src'))
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'id': thumbnail.get('quality') or thumbnail_id,
+ 'url': thumbnail_url,
+ 'preference': quality_key(thumbnail.get('quality')),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'is_live': live,
+ 'uploader': uploader if uploader != '-' else None,
+ 'upload_date': upload_date[0:8] if upload_date else None,
+ 'duration': duration,
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
+
+
+class NDREmbedIE(NDREmbedBaseIE):
+ IE_NAME = 'ndr:embed'
+ _VALID_URL = r'https?://(?:www\.)?ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)\.html'
+ _TESTS = [{
+ 'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html',
+ 'md5': '8b9306142fe65bbdefb5ce24edb6b0a9',
+ 'info_dict': {
+ 'id': 'ndraktuell28488',
+ 'ext': 'mp4',
+ 'title': 'Norddeutschland begrüßt Flüchtlinge',
+ 'is_live': False,
+ 'uploader': 'ndrtv',
+ 'upload_date': '20150907',
+ 'duration': 132,
+ },
+ }, {
+ 'url': 'http://www.ndr.de/ndr2/events/soundcheck/soundcheck3366-player.html',
+ 'md5': '002085c44bae38802d94ae5802a36e78',
+ 'info_dict': {
+ 'id': 'soundcheck3366',
+ 'ext': 'mp4',
+ 'title': 'Ella Henderson braucht Vergleiche nicht zu scheuen',
+ 'is_live': False,
+ 'uploader': 'ndr2',
+ 'upload_date': '20150912',
+ 'duration': 3554,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.ndr.de/info/audio51535-player.html',
+ 'md5': 'bb3cd38e24fbcc866d13b50ca59307b8',
+ 'info_dict': {
+ 'id': 'audio51535',
+ 'ext': 'mp3',
+ 'title': 'La Valette entgeht der Hinrichtung',
+ 'is_live': False,
+ 'uploader': 'ndrinfo',
+ 'upload_date': '20140729',
+ 'duration': 884,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.ndr.de/fernsehen/sendungen/visite/visite11010-externalPlayer.html',
+ 'md5': 'ae57f80511c1e1f2fd0d0d3d31aeae7c',
+ 'info_dict': {
+ 'id': 'visite11010',
+ 'ext': 'mp4',
+ 'title': 'Visite - die ganze Sendung',
+ 'is_live': False,
+ 'uploader': 'ndrtv',
+ 'upload_date': '20150902',
+ 'duration': 3525,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # httpVideoLive
+ 'url': 'http://www.ndr.de/fernsehen/livestream/livestream217-externalPlayer.html',
+ 'info_dict': {
+ 'id': 'livestream217',
+ 'ext': 'flv',
+ 'title': r're:^NDR Fernsehen Niedersachsen \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+ 'is_live': True,
+ 'upload_date': '20150910',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.ndr.de/ndrkultur/audio255020-player.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ndr.de/fernsehen/sendungen/nordtour/nordtour7124-player.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ndr.de/kultur/film/videos/videoimport10424-player.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ndr.de/fernsehen/sendungen/hamburg_journal/hamj43006-player.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ndr.de/fernsehen/sendungen/weltbilder/weltbilder4518-player.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ndr.de/fernsehen/doku952-player.html',
+ 'only_matching': True,
+ }]
+
+
+class NJoyEmbedIE(NDREmbedBaseIE):
+ IE_NAME = 'njoy:embed'
+ _VALID_URL = r'https?://(?:www\.)?n-joy\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html'
+ _TESTS = [{
+ # httpVideo
+ 'url': 'http://www.n-joy.de/events/reeperbahnfestival/doku948-player_image-bc168e87-5263-4d6d-bd27-bb643005a6de_theme-n-joy.html',
+ 'md5': '8483cbfe2320bd4d28a349d62d88bd74',
+ 'info_dict': {
+ 'id': 'doku948',
+ 'ext': 'mp4',
+ 'title': 'Zehn Jahre Reeperbahn Festival - die Doku',
+ 'is_live': False,
+ 'upload_date': '20150807',
+ 'duration': 1011,
+ },
+ }, {
+ # httpAudio
+ 'url': 'http://www.n-joy.de/news_wissen/stefanrichter100-player_image-d5e938b1-f21a-4b9a-86b8-aaba8bca3a13_theme-n-joy.html',
+ 'md5': 'd989f80f28ac954430f7b8a48197188a',
+ 'info_dict': {
+ 'id': 'stefanrichter100',
+ 'ext': 'mp3',
+ 'title': 'Interview mit einem Augenzeugen',
+ 'is_live': False,
+ 'uploader': 'njoy',
+ 'upload_date': '20150909',
+ 'duration': 140,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # httpAudioLive, no explicit ext
+ 'url': 'http://www.n-joy.de/news_wissen/webradioweltweit100-player_image-3fec0484-2244-4565-8fb8-ed25fd28b173_theme-n-joy.html',
+ 'info_dict': {
+ 'id': 'webradioweltweit100',
+ 'ext': 'mp3',
+ 'title': r're:^N-JOY Weltweit \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
+ 'is_live': True,
+ 'uploader': 'njoy',
+ 'upload_date': '20150810',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.n-joy.de/musik/dockville882-player_image-3905259e-0803-4764-ac72-8b7de077d80a_theme-n-joy.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.n-joy.de/radio/sendungen/morningshow/urlaubsfotos190-player_image-066a5df1-5c95-49ec-a323-941d848718db_theme-n-joy.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.n-joy.de/entertainment/comedy/krudetv290-player_image-ab261bfe-51bf-4bf3-87ba-c5122ee35b3d_theme-n-joy.html',
+ 'only_matching': True,
+ }]
diff --git a/youtube_dl/extractor/ndtv.py b/youtube_dlc/extractor/ndtv.py
index bc3eb9160..bc3eb9160 100644
--- a/youtube_dl/extractor/ndtv.py
+++ b/youtube_dlc/extractor/ndtv.py
diff --git a/youtube_dl/extractor/nerdcubed.py b/youtube_dlc/extractor/nerdcubed.py
index 9feccc672..9feccc672 100644
--- a/youtube_dl/extractor/nerdcubed.py
+++ b/youtube_dlc/extractor/nerdcubed.py
diff --git a/youtube_dl/extractor/neteasemusic.py b/youtube_dlc/extractor/neteasemusic.py
index 978a05841..978a05841 100644
--- a/youtube_dl/extractor/neteasemusic.py
+++ b/youtube_dlc/extractor/neteasemusic.py
diff --git a/youtube_dl/extractor/netzkino.py b/youtube_dlc/extractor/netzkino.py
index aec3026b1..aec3026b1 100644
--- a/youtube_dl/extractor/netzkino.py
+++ b/youtube_dlc/extractor/netzkino.py
diff --git a/youtube_dl/extractor/newgrounds.py b/youtube_dlc/extractor/newgrounds.py
index 82e7cf522..82e7cf522 100644
--- a/youtube_dl/extractor/newgrounds.py
+++ b/youtube_dlc/extractor/newgrounds.py
diff --git a/youtube_dl/extractor/newstube.py b/youtube_dlc/extractor/newstube.py
index dab4aec44..dab4aec44 100644
--- a/youtube_dl/extractor/newstube.py
+++ b/youtube_dlc/extractor/newstube.py
diff --git a/youtube_dl/extractor/nextmedia.py b/youtube_dlc/extractor/nextmedia.py
index 7bd1290bf..7bd1290bf 100644
--- a/youtube_dl/extractor/nextmedia.py
+++ b/youtube_dlc/extractor/nextmedia.py
diff --git a/youtube_dlc/extractor/nexx.py b/youtube_dlc/extractor/nexx.py
new file mode 100644
index 000000000..586c1b7eb
--- /dev/null
+++ b/youtube_dlc/extractor/nexx.py
@@ -0,0 +1,453 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import hashlib
+import random
+import re
+import time
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_duration,
+ try_get,
+ urlencode_postdata,
+)
+
+
+class NexxIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ https?://api\.nexx(?:\.cloud|cdn\.com)/v3/(?P<domain_id>\d+)/videos/byid/|
+ nexx:(?:(?P<domain_id_s>\d+):)?|
+ https?://arc\.nexx\.cloud/api/video/
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ # movie
+ 'url': 'https://api.nexx.cloud/v3/748/videos/byid/128907',
+ 'md5': '31899fd683de49ad46f4ee67e53e83fe',
+ 'info_dict': {
+ 'id': '128907',
+ 'ext': 'mp4',
+ 'title': 'Stiftung Warentest',
+ 'alt_title': 'Wie ein Test abläuft',
+ 'description': 'md5:d1ddb1ef63de721132abd38639cc2fd2',
+ 'creator': 'SPIEGEL TV',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 2509,
+ 'timestamp': 1384264416,
+ 'upload_date': '20131112',
+ },
+ }, {
+ # episode
+ 'url': 'https://api.nexx.cloud/v3/741/videos/byid/247858',
+ 'info_dict': {
+ 'id': '247858',
+ 'ext': 'mp4',
+ 'title': 'Return of the Golden Child (OV)',
+ 'description': 'md5:5d969537509a92b733de21bae249dc63',
+ 'release_year': 2017,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 1397,
+ 'timestamp': 1495033267,
+ 'upload_date': '20170517',
+ 'episode_number': 2,
+ 'season_number': 2,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'HTTP Error 404: Not Found',
+ }, {
+ # does not work via arc
+ 'url': 'nexx:741:1269984',
+ 'md5': 'c714b5b238b2958dc8d5642addba6886',
+ 'info_dict': {
+ 'id': '1269984',
+ 'ext': 'mp4',
+ 'title': '1 TAG ohne KLO... wortwörtlich! 😑',
+ 'alt_title': '1 TAG ohne KLO... wortwörtlich! 😑',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 607,
+ 'timestamp': 1518614955,
+ 'upload_date': '20180214',
+ },
+ }, {
+ # free cdn from http://www.spiegel.de/video/eifel-zoo-aufregung-um-ausgebrochene-raubtiere-video-99018031.html
+ 'url': 'nexx:747:1533779',
+ 'md5': '6bf6883912b82b7069fb86c2297e9893',
+ 'info_dict': {
+ 'id': '1533779',
+ 'ext': 'mp4',
+ 'title': 'Aufregung um ausgebrochene Raubtiere',
+ 'alt_title': 'Eifel-Zoo',
+ 'description': 'md5:f21375c91c74ad741dcb164c427999d2',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 111,
+ 'timestamp': 1527874460,
+ 'upload_date': '20180601',
+ },
+ }, {
+ 'url': 'https://api.nexxcdn.com/v3/748/videos/byid/128907',
+ 'only_matching': True,
+ }, {
+ 'url': 'nexx:748:128907',
+ 'only_matching': True,
+ }, {
+ 'url': 'nexx:128907',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://arc.nexx.cloud/api/video/128907.json',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_domain_id(webpage):
+ mobj = re.search(
+ r'<script\b[^>]+\bsrc=["\'](?:https?:)?//(?:require|arc)\.nexx(?:\.cloud|cdn\.com)/(?:sdk/)?(?P<id>\d+)',
+ webpage)
+ return mobj.group('id') if mobj else None
+
+ @staticmethod
+ def _extract_urls(webpage):
+ # Reference:
+ # 1. https://nx-s.akamaized.net/files/201510/44.pdf
+
+ entries = []
+
+ # JavaScript Integration
+ domain_id = NexxIE._extract_domain_id(webpage)
+ if domain_id:
+ for video_id in re.findall(
+ r'(?is)onPLAYReady.+?_play\.(?:init|(?:control\.)?addPlayer)\s*\(.+?\s*,\s*["\']?(\d+)',
+ webpage):
+ entries.append(
+ 'https://api.nexx.cloud/v3/%s/videos/byid/%s'
+ % (domain_id, video_id))
+
+ # TODO: support more embed formats
+
+ return entries
+
+ @staticmethod
+ def _extract_url(webpage):
+ return NexxIE._extract_urls(webpage)[0]
+
+ def _handle_error(self, response):
+ status = int_or_none(try_get(
+ response, lambda x: x['metadata']['status']) or 200)
+ if 200 <= status < 300:
+ return
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, response['metadata']['errorhint']),
+ expected=True)
+
+ def _call_api(self, domain_id, path, video_id, data=None, headers={}):
+ headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'
+ result = self._download_json(
+ 'https://api.nexx.cloud/v3/%s/%s' % (domain_id, path), video_id,
+ 'Downloading %s JSON' % path, data=urlencode_postdata(data),
+ headers=headers)
+ self._handle_error(result)
+ return result['result']
+
+ def _extract_free_formats(self, video, video_id):
+ stream_data = video['streamdata']
+ cdn = stream_data['cdnType']
+ assert cdn == 'free'
+
+ hash = video['general']['hash']
+
+ ps = compat_str(stream_data['originalDomain'])
+ if stream_data['applyFolderHierarchy'] == 1:
+ s = ('%04d' % int(video_id))[::-1]
+ ps += '/%s/%s' % (s[0:2], s[2:4])
+ ps += '/%s/%s_' % (video_id, hash)
+
+ t = 'http://%s' + ps
+ fd = stream_data['azureFileDistribution'].split(',')
+ cdn_provider = stream_data['cdnProvider']
+
+ def p0(p):
+ return '_%s' % p if stream_data['applyAzureStructure'] == 1 else ''
+
+ formats = []
+ if cdn_provider == 'ak':
+ t += ','
+ for i in fd:
+ p = i.split(':')
+ t += p[1] + p0(int(p[0])) + ','
+ t += '.mp4.csmil/master.%s'
+ elif cdn_provider == 'ce':
+ k = t.split('/')
+ h = k.pop()
+ http_base = t = '/'.join(k)
+ http_base = http_base % stream_data['cdnPathHTTP']
+ t += '/asset.ism/manifest.%s?dcp_ver=aos4&videostream='
+ for i in fd:
+ p = i.split(':')
+ tbr = int(p[0])
+ filename = '%s%s%s.mp4' % (h, p[1], p0(tbr))
+ f = {
+ 'url': http_base + '/' + filename,
+ 'format_id': '%s-http-%d' % (cdn, tbr),
+ 'tbr': tbr,
+ }
+ width_height = p[1].split('x')
+ if len(width_height) == 2:
+ f.update({
+ 'width': int_or_none(width_height[0]),
+ 'height': int_or_none(width_height[1]),
+ })
+ formats.append(f)
+ a = filename + ':%s' % (tbr * 1000)
+ t += a + ','
+ t = t[:-1] + '&audiostream=' + a.split(':')[0]
+ else:
+ assert False
+
+ if cdn_provider == 'ce':
+ formats.extend(self._extract_mpd_formats(
+ t % (stream_data['cdnPathDASH'], 'mpd'), video_id,
+ mpd_id='%s-dash' % cdn, fatal=False))
+ formats.extend(self._extract_m3u8_formats(
+ t % (stream_data['cdnPathHLS'], 'm3u8'), video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='%s-hls' % cdn, fatal=False))
+
+ return formats
+
+ def _extract_azure_formats(self, video, video_id):
+ stream_data = video['streamdata']
+ cdn = stream_data['cdnType']
+ assert cdn == 'azure'
+
+ azure_locator = stream_data['azureLocator']
+
+ def get_cdn_shield_base(shield_type='', static=False):
+ for secure in ('', 's'):
+ cdn_shield = stream_data.get('cdnShield%sHTTP%s' % (shield_type, secure.upper()))
+ if cdn_shield:
+ return 'http%s://%s' % (secure, cdn_shield)
+ else:
+ if 'fb' in stream_data['azureAccount']:
+ prefix = 'df' if static else 'f'
+ else:
+ prefix = 'd' if static else 'p'
+ account = int(stream_data['azureAccount'].replace('nexxplayplus', '').replace('nexxplayfb', ''))
+ return 'http://nx-%s%02d.akamaized.net/' % (prefix, account)
+
+ language = video['general'].get('language_raw') or ''
+
+ azure_stream_base = get_cdn_shield_base()
+ is_ml = ',' in language
+ azure_manifest_url = '%s%s/%s_src%s.ism/Manifest' % (
+ azure_stream_base, azure_locator, video_id, ('_manifest' if is_ml else '')) + '%s'
+
+ protection_token = try_get(
+ video, lambda x: x['protectiondata']['token'], compat_str)
+ if protection_token:
+ azure_manifest_url += '?hdnts=%s' % protection_token
+
+ formats = self._extract_m3u8_formats(
+ azure_manifest_url % '(format=m3u8-aapl)',
+ video_id, 'mp4', 'm3u8_native',
+ m3u8_id='%s-hls' % cdn, fatal=False)
+ formats.extend(self._extract_mpd_formats(
+ azure_manifest_url % '(format=mpd-time-csf)',
+ video_id, mpd_id='%s-dash' % cdn, fatal=False))
+ formats.extend(self._extract_ism_formats(
+ azure_manifest_url % '', video_id, ism_id='%s-mss' % cdn, fatal=False))
+
+ azure_progressive_base = get_cdn_shield_base('Prog', True)
+ azure_file_distribution = stream_data.get('azureFileDistribution')
+ if azure_file_distribution:
+ fds = azure_file_distribution.split(',')
+ if fds:
+ for fd in fds:
+ ss = fd.split(':')
+ if len(ss) == 2:
+ tbr = int_or_none(ss[0])
+ if tbr:
+ f = {
+ 'url': '%s%s/%s_src_%s_%d.mp4' % (
+ azure_progressive_base, azure_locator, video_id, ss[1], tbr),
+ 'format_id': '%s-http-%d' % (cdn, tbr),
+ 'tbr': tbr,
+ }
+ width_height = ss[1].split('x')
+ if len(width_height) == 2:
+ f.update({
+ 'width': int_or_none(width_height[0]),
+ 'height': int_or_none(width_height[1]),
+ })
+ formats.append(f)
+
+ return formats
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ domain_id = mobj.group('domain_id') or mobj.group('domain_id_s')
+ video_id = mobj.group('id')
+
+ video = None
+
+ def find_video(result):
+ if isinstance(result, dict):
+ return result
+ elif isinstance(result, list):
+ vid = int(video_id)
+ for v in result:
+ if try_get(v, lambda x: x['general']['ID'], int) == vid:
+ return v
+ return None
+
+ response = self._download_json(
+ 'https://arc.nexx.cloud/api/video/%s.json' % video_id,
+ video_id, fatal=False)
+ if response and isinstance(response, dict):
+ result = response.get('result')
+ if result:
+ video = find_video(result)
+
+ # not all videos work via arc, e.g. nexx:741:1269984
+ if not video:
+ # Reverse engineered from JS code (see getDeviceID function)
+ device_id = '%d:%d:%d%d' % (
+ random.randint(1, 4), int(time.time()),
+ random.randint(1e4, 99999), random.randint(1, 9))
+
+ result = self._call_api(domain_id, 'session/init', video_id, data={
+ 'nxp_devh': device_id,
+ 'nxp_userh': '',
+ 'precid': '0',
+ 'playlicense': '0',
+ 'screenx': '1920',
+ 'screeny': '1080',
+ 'playerversion': '6.0.00',
+ 'gateway': 'html5',
+ 'adGateway': '',
+ 'explicitlanguage': 'en-US',
+ 'addTextTemplates': '1',
+ 'addDomainData': '1',
+ 'addAdModel': '1',
+ }, headers={
+ 'X-Request-Enable-Auth-Fallback': '1',
+ })
+
+ cid = result['general']['cid']
+
+ # As described in [1] X-Request-Token generation algorithm is
+ # as follows:
+ # md5( operation + domain_id + domain_secret )
+ # where domain_secret is a static value that will be given by nexx.tv
+ # as per [1]. Here is how this "secret" is generated (reversed
+ # from _play.api.init function, search for clienttoken). So it's
+ # actually not static and not that much of a secret.
+ # 1. https://nexxtvstorage.blob.core.windows.net/files/201610/27.pdf
+ secret = result['device']['clienttoken'][int(device_id[0]):]
+ secret = secret[0:len(secret) - int(device_id[-1])]
+
+ op = 'byid'
+
+ # Reversed from JS code for _play.api.call function (search for
+ # X-Request-Token)
+ request_token = hashlib.md5(
+ ''.join((op, domain_id, secret)).encode('utf-8')).hexdigest()
+
+ result = self._call_api(
+ domain_id, 'videos/%s/%s' % (op, video_id), video_id, data={
+ 'additionalfields': 'language,channel,actors,studio,licenseby,slug,subtitle,teaser,description',
+ 'addInteractionOptions': '1',
+ 'addStatusDetails': '1',
+ 'addStreamDetails': '1',
+ 'addCaptions': '1',
+ 'addScenes': '1',
+ 'addHotSpots': '1',
+ 'addBumpers': '1',
+ 'captionFormat': 'data',
+ }, headers={
+ 'X-Request-CID': cid,
+ 'X-Request-Token': request_token,
+ })
+ video = find_video(result)
+
+ general = video['general']
+ title = general['title']
+
+ cdn = video['streamdata']['cdnType']
+
+ if cdn == 'azure':
+ formats = self._extract_azure_formats(video, video_id)
+ elif cdn == 'free':
+ formats = self._extract_free_formats(video, video_id)
+ else:
+ # TODO: reverse more cdns
+ assert False
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'alt_title': general.get('subtitle'),
+ 'description': general.get('description'),
+ 'release_year': int_or_none(general.get('year')),
+ 'creator': general.get('studio') or general.get('studio_adref'),
+ 'thumbnail': try_get(
+ video, lambda x: x['imagedata']['thumb'], compat_str),
+ 'duration': parse_duration(general.get('runtime')),
+ 'timestamp': int_or_none(general.get('uploaded')),
+ 'episode_number': int_or_none(try_get(
+ video, lambda x: x['episodedata']['episode'])),
+ 'season_number': int_or_none(try_get(
+ video, lambda x: x['episodedata']['season'])),
+ 'formats': formats,
+ }
+
+
+class NexxEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?:video/)?(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'http://embed.nexx.cloud/748/KC1614647Z27Y7T?autoplay=1',
+ 'md5': '16746bfc28c42049492385c989b26c4a',
+ 'info_dict': {
+ 'id': '161464',
+ 'ext': 'mp4',
+ 'title': 'Nervenkitzel Achterbahn',
+ 'alt_title': 'Karussellbauer in Deutschland',
+ 'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc',
+ 'creator': 'SPIEGEL TV',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 2761,
+ 'timestamp': 1394021479,
+ 'upload_date': '20140305',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://embed.nexx.cloud/11888/video/DSRTO7UVOX06S7',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ # Reference:
+ # 1. https://nx-s.akamaized.net/files/201510/44.pdf
+
+ # iFrame Embed Integration
+ return [mobj.group('url') for mobj in re.finditer(
+ r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//embed\.nexx(?:\.cloud|cdn\.com)/\d+/(?:(?!\1).)+)\1',
+ webpage)]
+
+ def _real_extract(self, url):
+ embed_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, embed_id)
+
+ return self.url_result(NexxIE._extract_url(webpage), ie=NexxIE.ie_key())
diff --git a/youtube_dl/extractor/nfl.py b/youtube_dlc/extractor/nfl.py
index 460deb162..460deb162 100644
--- a/youtube_dl/extractor/nfl.py
+++ b/youtube_dlc/extractor/nfl.py
diff --git a/youtube_dlc/extractor/nhk.py b/youtube_dlc/extractor/nhk.py
new file mode 100644
index 000000000..de6a707c4
--- /dev/null
+++ b/youtube_dlc/extractor/nhk.py
@@ -0,0 +1,93 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class NhkVodIE(InfoExtractor):
+ _VALID_URL = r'https?://www3\.nhk\.or\.jp/nhkworld/(?P<lang>[a-z]{2})/ondemand/(?P<type>video|audio)/(?P<id>\d{7}|[^/]+?-\d{8}-\d+)'
+ # Content available only for a limited period of time. Visit
+ # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples.
+ _TESTS = [{
+ # clip
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
+ 'md5': '256a1be14f48d960a7e61e2532d95ec3',
+ 'info_dict': {
+ 'id': 'a95j5iza',
+ 'ext': 'mp4',
+ 'title': "Dining with the Chef - Chef Saito's Family recipe: MENCHI-KATSU",
+ 'description': 'md5:5aee4a9f9d81c26281862382103b0ea5',
+ 'timestamp': 1565965194,
+ 'upload_date': '20190816',
+ },
+ }, {
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/plugin-20190404-1/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www3.nhk.or.jp/nhkworld/fr/ondemand/audio/plugin-20190404-1/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/j_art-20150903-1/',
+ 'only_matching': True,
+ }]
+ _API_URL_TEMPLATE = 'https://api.nhk.or.jp/nhkworld/%sod%slist/v7a/episode/%s/%s/all%s.json'
+
+ def _real_extract(self, url):
+ lang, m_type, episode_id = re.match(self._VALID_URL, url).groups()
+ if episode_id.isdigit():
+ episode_id = episode_id[:4] + '-' + episode_id[4:]
+
+ is_video = m_type == 'video'
+ episode = self._download_json(
+ self._API_URL_TEMPLATE % (
+ 'v' if is_video else 'r',
+ 'clip' if episode_id[:4] == '9999' else 'esd',
+ episode_id, lang, '/all' if is_video else ''),
+ episode_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'][0]
+ title = episode.get('sub_title_clean') or episode['sub_title']
+
+ def get_clean_field(key):
+ return episode.get(key + '_clean') or episode.get(key)
+
+ series = get_clean_field('title')
+
+ thumbnails = []
+ for s, w, h in [('', 640, 360), ('_l', 1280, 720)]:
+ img_path = episode.get('image' + s)
+ if not img_path:
+ continue
+ thumbnails.append({
+ 'id': '%dp' % h,
+ 'height': h,
+ 'width': w,
+ 'url': 'https://www3.nhk.or.jp' + img_path,
+ })
+
+ info = {
+ 'id': episode_id + '-' + lang,
+ 'title': '%s - %s' % (series, title) if series and title else title,
+ 'description': get_clean_field('description'),
+ 'thumbnails': thumbnails,
+ 'series': series,
+ 'episode': title,
+ }
+ if is_video:
+ info.update({
+ '_type': 'url_transparent',
+ 'ie_key': 'Piksel',
+ 'url': 'https://player.piksel.com/v/refid/nhkworld/prefid/' + episode['vod_id'],
+ })
+ else:
+ audio = episode['audio']
+ audio_path = audio['audio']
+ info['formats'] = self._extract_m3u8_formats(
+ 'https://nhkworld-vh.akamaihd.net/i%s/master.m3u8' % audio_path,
+ episode_id, 'm4a', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False)
+ for f in info['formats']:
+ f['language'] = lang
+ return info
diff --git a/youtube_dl/extractor/nhl.py b/youtube_dlc/extractor/nhl.py
index eddfe1f37..eddfe1f37 100644
--- a/youtube_dl/extractor/nhl.py
+++ b/youtube_dlc/extractor/nhl.py
diff --git a/youtube_dl/extractor/nick.py b/youtube_dlc/extractor/nick.py
index 2e8b302ac..2e8b302ac 100644
--- a/youtube_dl/extractor/nick.py
+++ b/youtube_dlc/extractor/nick.py
diff --git a/youtube_dl/extractor/niconico.py b/youtube_dlc/extractor/niconico.py
index eb07ca776..eb07ca776 100644
--- a/youtube_dl/extractor/niconico.py
+++ b/youtube_dlc/extractor/niconico.py
diff --git a/youtube_dl/extractor/ninecninemedia.py b/youtube_dlc/extractor/ninecninemedia.py
index 65754c5e7..65754c5e7 100644
--- a/youtube_dl/extractor/ninecninemedia.py
+++ b/youtube_dlc/extractor/ninecninemedia.py
diff --git a/youtube_dl/extractor/ninegag.py b/youtube_dlc/extractor/ninegag.py
index dc6a27d36..dc6a27d36 100644
--- a/youtube_dl/extractor/ninegag.py
+++ b/youtube_dlc/extractor/ninegag.py
diff --git a/youtube_dl/extractor/ninenow.py b/youtube_dlc/extractor/ninenow.py
index 6157dc7c1..6157dc7c1 100644
--- a/youtube_dl/extractor/ninenow.py
+++ b/youtube_dlc/extractor/ninenow.py
diff --git a/youtube_dlc/extractor/nintendo.py b/youtube_dlc/extractor/nintendo.py
new file mode 100644
index 000000000..ff8f70ba6
--- /dev/null
+++ b/youtube_dlc/extractor/nintendo.py
@@ -0,0 +1,60 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .ooyala import OoyalaIE
+
+
+class NintendoIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?nintendo\.com/(?:games/detail|nintendo-direct)/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.nintendo.com/games/detail/duck-hunt-wii-u/',
+ 'info_dict': {
+ 'id': 'MzMmticjp0VPzO3CCj4rmFOuohEuEWoW',
+ 'ext': 'flv',
+ 'title': 'Duck Hunt Wii U VC NES - Trailer',
+ 'duration': 60.326,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Ooyala'],
+ }, {
+ 'url': 'http://www.nintendo.com/games/detail/tokyo-mirage-sessions-fe-wii-u',
+ 'info_dict': {
+ 'id': 'tokyo-mirage-sessions-fe-wii-u',
+ 'title': 'Tokyo Mirage Sessions ♯FE',
+ },
+ 'playlist_count': 4,
+ }, {
+ 'url': 'https://www.nintendo.com/nintendo-direct/09-04-2019/',
+ 'info_dict': {
+ 'id': 'J2bXdmaTE6fe3dWJTPcc7m23FNbc_A1V',
+ 'ext': 'mp4',
+ 'title': 'Switch_ROS_ND0904-H264.mov',
+ 'duration': 2324.758,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Ooyala'],
+ }]
+
+ def _real_extract(self, url):
+ page_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, page_id)
+
+ entries = [
+ OoyalaIE._build_url_result(m.group('code'))
+ for m in re.finditer(
+ r'data-(?:video-id|directVideoId)=(["\'])(?P<code>(?:(?!\1).)+)\1', webpage)]
+
+ title = self._html_search_regex(
+ r'(?s)<(?:span|div)[^>]+class="(?:title|wrapper)"[^>]*>.*?<h1>(.+?)</h1>',
+ webpage, 'title', fatal=False)
+
+ return self.playlist_result(
+ entries, page_id, title)
diff --git a/youtube_dl/extractor/njpwworld.py b/youtube_dlc/extractor/njpwworld.py
index 025c5d249..025c5d249 100644
--- a/youtube_dl/extractor/njpwworld.py
+++ b/youtube_dlc/extractor/njpwworld.py
diff --git a/youtube_dl/extractor/nobelprize.py b/youtube_dlc/extractor/nobelprize.py
index 4dfdb09d6..4dfdb09d6 100644
--- a/youtube_dl/extractor/nobelprize.py
+++ b/youtube_dlc/extractor/nobelprize.py
diff --git a/youtube_dl/extractor/noco.py b/youtube_dlc/extractor/noco.py
index 30df905af..30df905af 100644
--- a/youtube_dl/extractor/noco.py
+++ b/youtube_dlc/extractor/noco.py
diff --git a/youtube_dl/extractor/nonktube.py b/youtube_dlc/extractor/nonktube.py
index ca1424e06..ca1424e06 100644
--- a/youtube_dl/extractor/nonktube.py
+++ b/youtube_dlc/extractor/nonktube.py
diff --git a/youtube_dl/extractor/noovo.py b/youtube_dlc/extractor/noovo.py
index b40770d07..b40770d07 100644
--- a/youtube_dl/extractor/noovo.py
+++ b/youtube_dlc/extractor/noovo.py
diff --git a/youtube_dl/extractor/normalboots.py b/youtube_dlc/extractor/normalboots.py
index 61fe571df..61fe571df 100644
--- a/youtube_dl/extractor/normalboots.py
+++ b/youtube_dlc/extractor/normalboots.py
diff --git a/youtube_dl/extractor/nosvideo.py b/youtube_dlc/extractor/nosvideo.py
index 53c500c35..53c500c35 100644
--- a/youtube_dl/extractor/nosvideo.py
+++ b/youtube_dlc/extractor/nosvideo.py
diff --git a/youtube_dlc/extractor/nova.py b/youtube_dlc/extractor/nova.py
new file mode 100644
index 000000000..47b9748f0
--- /dev/null
+++ b/youtube_dlc/extractor/nova.py
@@ -0,0 +1,305 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ determine_ext,
+ int_or_none,
+ js_to_json,
+ qualities,
+ unified_strdate,
+ url_or_none,
+)
+
+
+class NovaEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://media\.cms\.nova\.cz/embed/(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://media.cms.nova.cz/embed/8o0n0r?autoplay=1',
+ 'md5': 'ee009bafcc794541570edd44b71cbea3',
+ 'info_dict': {
+ 'id': '8o0n0r',
+ 'ext': 'mp4',
+ 'title': '2180. díl',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 2578,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ duration = None
+ formats = []
+
+ player = self._parse_json(
+ self._search_regex(
+ r'Player\.init\s*\([^,]+,\s*({.+?})\s*,\s*{.+?}\s*\)\s*;',
+ webpage, 'player', default='{}'), video_id, fatal=False)
+ if player:
+ for format_id, format_list in player['tracks'].items():
+ if not isinstance(format_list, list):
+ format_list = [format_list]
+ for format_dict in format_list:
+ if not isinstance(format_dict, dict):
+ continue
+ format_url = url_or_none(format_dict.get('src'))
+ format_type = format_dict.get('type')
+ ext = determine_ext(format_url)
+ if (format_type == 'application/x-mpegURL'
+ or format_id == 'HLS' or ext == 'm3u8'):
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls',
+ fatal=False))
+ elif (format_type == 'application/dash+xml'
+ or format_id == 'DASH' or ext == 'mpd'):
+ formats.extend(self._extract_mpd_formats(
+ format_url, video_id, mpd_id='dash', fatal=False))
+ else:
+ formats.append({
+ 'url': format_url,
+ })
+ duration = int_or_none(player.get('duration'))
+ else:
+ # Old path, not actual as of 08.04.2020
+ bitrates = self._parse_json(
+ self._search_regex(
+ r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
+ video_id, transform_source=js_to_json)
+
+ QUALITIES = ('lq', 'mq', 'hq', 'hd')
+ quality_key = qualities(QUALITIES)
+
+ for format_id, format_list in bitrates.items():
+ if not isinstance(format_list, list):
+ format_list = [format_list]
+ for format_url in format_list:
+ format_url = url_or_none(format_url)
+ if not format_url:
+ continue
+ if format_id == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, ext='mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls',
+ fatal=False))
+ continue
+ f = {
+ 'url': format_url,
+ }
+ f_id = format_id
+ for quality in QUALITIES:
+ if '%s.mp4' % quality in format_url:
+ f_id += '-%s' % quality
+ f.update({
+ 'quality': quality_key(quality),
+ 'format_note': quality.upper(),
+ })
+ break
+ f['format_id'] = f_id
+ formats.append(f)
+
+ self._sort_formats(formats)
+
+ title = self._og_search_title(
+ webpage, default=None) or self._search_regex(
+ (r'<value>(?P<title>[^<]+)',
+ r'videoTitle\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
+ 'title', group='value')
+ thumbnail = self._og_search_thumbnail(
+ webpage, default=None) or self._search_regex(
+ r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
+ 'thumbnail', fatal=False, group='value')
+ duration = int_or_none(self._search_regex(
+ r'videoDuration\s*:\s*(\d+)', webpage, 'duration',
+ default=duration))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ }
+
+
+class NovaIE(InfoExtractor):
+ IE_DESC = 'TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz'
+ _VALID_URL = r'https?://(?:[^.]+\.)?(?P<site>tv(?:noviny)?|tn|novaplus|vymena|fanda|krasna|doma|prask)\.nova\.cz/(?:[^/]+/)+(?P<id>[^/]+?)(?:\.html|/|$)'
+ _TESTS = [{
+ 'url': 'http://tn.nova.cz/clanek/tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci.html#player_13260',
+ 'md5': '249baab7d0104e186e78b0899c7d5f28',
+ 'info_dict': {
+ 'id': '1757139',
+ 'display_id': 'tajemstvi-ukryte-v-podzemi-specialni-nemocnice-v-prazske-krci',
+ 'ext': 'mp4',
+ 'title': 'Podzemní nemocnice v pražské Krči',
+ 'description': 'md5:f0a42dd239c26f61c28f19e62d20ef53',
+ 'thumbnail': r're:^https?://.*\.(?:jpg)',
+ }
+ }, {
+ 'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html',
+ 'info_dict': {
+ 'id': '1753621',
+ 'ext': 'mp4',
+ 'title': 'Zaklínač 3: Divoký hon',
+ 'description': 're:.*Pokud se stejně jako my nemůžete.*',
+ 'thumbnail': r're:https?://.*\.jpg(\?.*)?',
+ 'upload_date': '20150521',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'gone',
+ }, {
+ # media.cms.nova.cz embed
+ 'url': 'https://novaplus.nova.cz/porad/ulice/epizoda/18760-2180-dil',
+ 'info_dict': {
+ 'id': '8o0n0r',
+ 'ext': 'mp4',
+ 'title': '2180. díl',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 2578,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [NovaEmbedIE.ie_key()],
+ 'skip': 'CHYBA 404: STRÁNKA NENALEZENA',
+ }, {
+ 'url': 'http://sport.tn.nova.cz/clanek/sport/hokej/nhl/zivot-jde-dal-hodnotil-po-vyrazeni-z-playoff-jiri-sekac.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://fanda.nova.cz/clanek/fun-and-games/krvavy-epos-zaklinac-3-divoky-hon-vychazi-vyhrajte-ho-pro-sebe.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://doma.nova.cz/clanek/zdravi/prijdte-se-zapsat-do-registru-kostni-drene-jiz-ve-stredu-3-cervna.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://prask.nova.cz/clanek/novinky/co-si-na-sobe-nase-hvezdy-nechaly-pojistit.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://tv.nova.cz/clanek/novinky/zivot-je-zivot-bondovsky-trailer.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id = mobj.group('id')
+ site = mobj.group('site')
+
+ webpage = self._download_webpage(url, display_id)
+
+ description = clean_html(self._og_search_description(webpage, default=None))
+ if site == 'novaplus':
+ upload_date = unified_strdate(self._search_regex(
+ r'(\d{1,2}-\d{1,2}-\d{4})$', display_id, 'upload date', default=None))
+ elif site == 'fanda':
+ upload_date = unified_strdate(self._search_regex(
+ r'<span class="date_time">(\d{1,2}\.\d{1,2}\.\d{4})', webpage, 'upload date', default=None))
+ else:
+ upload_date = None
+
+ # novaplus
+ embed_id = self._search_regex(
+ r'<iframe[^>]+\bsrc=["\'](?:https?:)?//media\.cms\.nova\.cz/embed/([^/?#&]+)',
+ webpage, 'embed url', default=None)
+ if embed_id:
+ return {
+ '_type': 'url_transparent',
+ 'url': 'https://media.cms.nova.cz/embed/%s' % embed_id,
+ 'ie_key': NovaEmbedIE.ie_key(),
+ 'id': embed_id,
+ 'description': description,
+ 'upload_date': upload_date
+ }
+
+ video_id = self._search_regex(
+ [r"(?:media|video_id)\s*:\s*'(\d+)'",
+ r'media=(\d+)',
+ r'id="article_video_(\d+)"',
+ r'id="player_(\d+)"'],
+ webpage, 'video id')
+
+ config_url = self._search_regex(
+ r'src="(https?://(?:tn|api)\.nova\.cz/bin/player/videojs/config\.php\?[^"]+)"',
+ webpage, 'config url', default=None)
+ config_params = {}
+
+ if not config_url:
+ player = self._parse_json(
+ self._search_regex(
+ r'(?s)Player\s*\(.+?\s*,\s*({.+?\bmedia\b["\']?\s*:\s*["\']?\d+.+?})\s*\)', webpage,
+ 'player', default='{}'),
+ video_id, transform_source=js_to_json, fatal=False)
+ if player:
+ config_url = url_or_none(player.get('configUrl'))
+ params = player.get('configParams')
+ if isinstance(params, dict):
+ config_params = params
+
+ if not config_url:
+ DEFAULT_SITE_ID = '23000'
+ SITES = {
+ 'tvnoviny': DEFAULT_SITE_ID,
+ 'novaplus': DEFAULT_SITE_ID,
+ 'vymena': DEFAULT_SITE_ID,
+ 'krasna': DEFAULT_SITE_ID,
+ 'fanda': '30',
+ 'tn': '30',
+ 'doma': '30',
+ }
+
+ site_id = self._search_regex(
+ r'site=(\d+)', webpage, 'site id', default=None) or SITES.get(
+ site, DEFAULT_SITE_ID)
+
+ config_url = 'https://api.nova.cz/bin/player/videojs/config.php'
+ config_params = {
+ 'site': site_id,
+ 'media': video_id,
+ 'quality': 3,
+ 'version': 1,
+ }
+
+ config = self._download_json(
+ config_url, display_id,
+ 'Downloading config JSON', query=config_params,
+ transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1])
+
+ mediafile = config['mediafile']
+ video_url = mediafile['src']
+
+ m = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>[^/]+?))/&*(?P<playpath>.+)$', video_url)
+ if m:
+ formats = [{
+ 'url': m.group('url'),
+ 'app': m.group('app'),
+ 'play_path': m.group('playpath'),
+ 'player_path': 'http://tvnoviny.nova.cz/static/shared/app/videojs/video-js.swf',
+ 'ext': 'flv',
+ }]
+ else:
+ formats = [{
+ 'url': video_url,
+ }]
+ self._sort_formats(formats)
+
+ title = mediafile.get('meta', {}).get('title') or self._og_search_title(webpage)
+ thumbnail = config.get('poster')
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'upload_date': upload_date,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
diff --git a/youtube_dlc/extractor/nowness.py b/youtube_dlc/extractor/nowness.py
new file mode 100644
index 000000000..c136bc8c0
--- /dev/null
+++ b/youtube_dlc/extractor/nowness.py
@@ -0,0 +1,147 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .brightcove import (
+ BrightcoveLegacyIE,
+ BrightcoveNewIE,
+)
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ sanitized_Request,
+)
+
+
+class NownessBaseIE(InfoExtractor):
+ def _extract_url_result(self, post):
+ if post['type'] == 'video':
+ for media in post['media']:
+ if media['type'] == 'video':
+ video_id = media['content']
+ source = media['source']
+ if source == 'brightcove':
+ player_code = self._download_webpage(
+ 'http://www.nowness.com/iframe?id=%s' % video_id, video_id,
+ note='Downloading player JavaScript',
+ errnote='Unable to download player JavaScript')
+ bc_url = BrightcoveLegacyIE._extract_brightcove_url(player_code)
+ if bc_url:
+ return self.url_result(bc_url, BrightcoveLegacyIE.ie_key())
+ bc_url = BrightcoveNewIE._extract_url(self, player_code)
+ if bc_url:
+ return self.url_result(bc_url, BrightcoveNewIE.ie_key())
+ raise ExtractorError('Could not find player definition')
+ elif source == 'vimeo':
+ return self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
+ elif source == 'youtube':
+ return self.url_result(video_id, 'Youtube')
+ elif source == 'cinematique':
+ # youtube-dlc currently doesn't support cinematique
+ # return self.url_result('http://cinematique.com/embed/%s' % video_id, 'Cinematique')
+ pass
+
+ def _api_request(self, url, request_path):
+ display_id = self._match_id(url)
+ request = sanitized_Request(
+ 'http://api.nowness.com/api/' + request_path % display_id,
+ headers={
+ 'X-Nowness-Language': 'zh-cn' if 'cn.nowness.com' in url else 'en-us',
+ })
+ return display_id, self._download_json(request, display_id)
+
+
+class NownessIE(NownessBaseIE):
+ IE_NAME = 'nowness'
+ _VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/(?:story|(?:series|category)/[^/]+)/(?P<id>[^/]+?)(?:$|[?#])'
+ _TESTS = [{
+ 'url': 'https://www.nowness.com/story/candor-the-art-of-gesticulation',
+ 'md5': '068bc0202558c2e391924cb8cc470676',
+ 'info_dict': {
+ 'id': '2520295746001',
+ 'ext': 'mp4',
+ 'title': 'Candor: The Art of Gesticulation',
+ 'description': 'Candor: The Art of Gesticulation',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'timestamp': 1446745676,
+ 'upload_date': '20151105',
+ 'uploader_id': '2385340575001',
+ },
+ 'add_ie': ['BrightcoveNew'],
+ }, {
+ 'url': 'https://cn.nowness.com/story/kasper-bjorke-ft-jaakko-eino-kalevi-tnr',
+ 'md5': 'e79cf125e387216f86b2e0a5b5c63aa3',
+ 'info_dict': {
+ 'id': '3716354522001',
+ 'ext': 'mp4',
+ 'title': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
+ 'description': 'Kasper Bjørke ft. Jaakko Eino Kalevi: TNR',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'timestamp': 1407315371,
+ 'upload_date': '20140806',
+ 'uploader_id': '2385340575001',
+ },
+ 'add_ie': ['BrightcoveNew'],
+ }, {
+ # vimeo
+ 'url': 'https://www.nowness.com/series/nowness-picks/jean-luc-godard-supercut',
+ 'md5': '9a5a6a8edf806407e411296ab6bc2a49',
+ 'info_dict': {
+ 'id': '130020913',
+ 'ext': 'mp4',
+ 'title': 'Bleu, Blanc, Rouge - A Godard Supercut',
+ 'description': 'md5:f0ea5f1857dffca02dbd37875d742cec',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'upload_date': '20150607',
+ 'uploader': 'Cinema Sem Lei',
+ 'uploader_id': 'cinemasemlei',
+ },
+ 'add_ie': ['Vimeo'],
+ }]
+
+ def _real_extract(self, url):
+ _, post = self._api_request(url, 'post/getBySlug/%s')
+ return self._extract_url_result(post)
+
+
+class NownessPlaylistIE(NownessBaseIE):
+ IE_NAME = 'nowness:playlist'
+ _VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/playlist/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.nowness.com/playlist/3286/i-guess-thats-why-they-call-it-the-blues',
+ 'info_dict': {
+ 'id': '3286',
+ },
+ 'playlist_mincount': 8,
+ }
+
+ def _real_extract(self, url):
+ playlist_id, playlist = self._api_request(url, 'post?PlaylistId=%s')
+ entries = [self._extract_url_result(item) for item in playlist['items']]
+ return self.playlist_result(entries, playlist_id)
+
+
+class NownessSeriesIE(NownessBaseIE):
+ IE_NAME = 'nowness:series'
+ _VALID_URL = r'https?://(?:(?:www|cn)\.)?nowness\.com/series/(?P<id>[^/]+?)(?:$|[?#])'
+ _TEST = {
+ 'url': 'https://www.nowness.com/series/60-seconds',
+ 'info_dict': {
+ 'id': '60',
+ 'title': '60 Seconds',
+ 'description': 'One-minute wisdom in a new NOWNESS series',
+ },
+ 'playlist_mincount': 4,
+ }
+
+ def _real_extract(self, url):
+ display_id, series = self._api_request(url, 'series/getBySlug/%s')
+ entries = [self._extract_url_result(post) for post in series['posts']]
+ series_title = None
+ series_description = None
+ translations = series.get('translations', [])
+ if translations:
+ series_title = translations[0].get('title') or translations[0]['seoTitle']
+ series_description = translations[0].get('seoDescription')
+ return self.playlist_result(
+ entries, compat_str(series['id']), series_title, series_description)
diff --git a/youtube_dl/extractor/noz.py b/youtube_dlc/extractor/noz.py
index ccafd7723..ccafd7723 100644
--- a/youtube_dl/extractor/noz.py
+++ b/youtube_dlc/extractor/noz.py
diff --git a/youtube_dl/extractor/npo.py b/youtube_dlc/extractor/npo.py
index e525ad928..e525ad928 100644
--- a/youtube_dl/extractor/npo.py
+++ b/youtube_dlc/extractor/npo.py
diff --git a/youtube_dlc/extractor/npr.py b/youtube_dlc/extractor/npr.py
new file mode 100644
index 000000000..53acc6e57
--- /dev/null
+++ b/youtube_dlc/extractor/npr.py
@@ -0,0 +1,124 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ qualities,
+ url_or_none,
+)
+
+
+class NprIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?npr\.org/(?:sections/[^/]+/)?\d{4}/\d{2}/\d{2}/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.npr.org/sections/allsongs/2015/10/21/449974205/new-music-from-beach-house-chairlift-cmj-discoveries-and-more',
+ 'info_dict': {
+ 'id': '449974205',
+ 'title': 'New Music From Beach House, Chairlift, CMJ Discoveries And More'
+ },
+ 'playlist_count': 7,
+ }, {
+ 'url': 'https://www.npr.org/sections/deceptivecadence/2015/10/09/446928052/music-from-the-shadows-ancient-armenian-hymns-and-piano-jazz',
+ 'info_dict': {
+ 'id': '446928052',
+ 'title': "Songs We Love: Tigran Hamasyan, 'Your Mercy is Boundless'"
+ },
+ 'playlist': [{
+ 'md5': '12fa60cb2d3ed932f53609d4aeceabf1',
+ 'info_dict': {
+ 'id': '446929930',
+ 'ext': 'mp3',
+ 'title': 'Your Mercy is Boundless (Bazum en Qo gtutyunqd)',
+ 'duration': 402,
+ },
+ }],
+ }, {
+ # mutlimedia, not media title
+ 'url': 'https://www.npr.org/2017/06/19/533198237/tigers-jaw-tiny-desk-concert',
+ 'info_dict': {
+ 'id': '533198237',
+ 'title': 'Tigers Jaw: Tiny Desk Concert',
+ },
+ 'playlist': [{
+ 'md5': '12fa60cb2d3ed932f53609d4aeceabf1',
+ 'info_dict': {
+ 'id': '533201718',
+ 'ext': 'mp4',
+ 'title': 'Tigers Jaw: Tiny Desk Concert',
+ 'duration': 402,
+ },
+ }],
+ 'expected_warnings': ['Failed to download m3u8 information'],
+ }, {
+ # multimedia, no formats, stream
+ 'url': 'https://www.npr.org/2020/02/14/805476846/laura-stevenson-tiny-desk-concert',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ story = self._download_json(
+ 'http://api.npr.org/query', playlist_id, query={
+ 'id': playlist_id,
+ 'fields': 'audio,multimedia,title',
+ 'format': 'json',
+ 'apiKey': 'MDAzMzQ2MjAyMDEyMzk4MTU1MDg3ZmM3MQ010',
+ })['list']['story'][0]
+ playlist_title = story.get('title', {}).get('$text')
+
+ KNOWN_FORMATS = ('threegp', 'm3u8', 'smil', 'mp4', 'mp3')
+ quality = qualities(KNOWN_FORMATS)
+
+ entries = []
+ for media in story.get('audio', []) + story.get('multimedia', []):
+ media_id = media['id']
+
+ formats = []
+ for format_id, formats_entry in media.get('format', {}).items():
+ if not formats_entry:
+ continue
+ if isinstance(formats_entry, list):
+ formats_entry = formats_entry[0]
+ format_url = formats_entry.get('$text')
+ if not format_url:
+ continue
+ if format_id in KNOWN_FORMATS:
+ if format_id == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, media_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif format_id == 'smil':
+ smil_formats = self._extract_smil_formats(
+ format_url, media_id, transform_source=lambda s: s.replace(
+ 'rtmp://flash.npr.org/ondemand/', 'https://ondemand.npr.org/'))
+ self._check_formats(smil_formats, media_id)
+ formats.extend(smil_formats)
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'quality': quality(format_id),
+ })
+ for stream_id, stream_entry in media.get('stream', {}).items():
+ if not isinstance(stream_entry, dict):
+ continue
+ if stream_id != 'hlsUrl':
+ continue
+ stream_url = url_or_none(stream_entry.get('$text'))
+ if not stream_url:
+ continue
+ formats.extend(self._extract_m3u8_formats(
+ stream_url, stream_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ self._sort_formats(formats)
+
+ entries.append({
+ 'id': media_id,
+ 'title': media.get('title', {}).get('$text') or playlist_title,
+ 'thumbnail': media.get('altImageUrl', {}).get('$text'),
+ 'duration': int_or_none(media.get('duration', {}).get('$text')),
+ 'formats': formats,
+ })
+
+ return self.playlist_result(entries, playlist_id, playlist_title)
diff --git a/youtube_dlc/extractor/nrk.py b/youtube_dlc/extractor/nrk.py
new file mode 100644
index 000000000..94115534b
--- /dev/null
+++ b/youtube_dlc/extractor/nrk.py
@@ -0,0 +1,717 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_unquote,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ JSON_LD_RE,
+ js_to_json,
+ NO_DEFAULT,
+ parse_age_limit,
+ parse_duration,
+ try_get,
+)
+
+
+class NRKBaseIE(InfoExtractor):
+ _GEO_COUNTRIES = ['NO']
+
+ _api_host = None
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ api_hosts = (self._api_host, ) if self._api_host else self._API_HOSTS
+
+ for api_host in api_hosts:
+ data = self._download_json(
+ 'http://%s/mediaelement/%s' % (api_host, video_id),
+ video_id, 'Downloading mediaelement JSON',
+ fatal=api_host == api_hosts[-1])
+ if not data:
+ continue
+ self._api_host = api_host
+ break
+
+ title = data.get('fullTitle') or data.get('mainTitle') or data['title']
+ video_id = data.get('id') or video_id
+
+ entries = []
+
+ conviva = data.get('convivaStatistics') or {}
+ live = (data.get('mediaElementType') == 'Live'
+ or data.get('isLive') is True or conviva.get('isLive'))
+
+ def make_title(t):
+ return self._live_title(t) if live else t
+
+ media_assets = data.get('mediaAssets')
+ if media_assets and isinstance(media_assets, list):
+ def video_id_and_title(idx):
+ return ((video_id, title) if len(media_assets) == 1
+ else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx)))
+ for num, asset in enumerate(media_assets, 1):
+ asset_url = asset.get('url')
+ if not asset_url:
+ continue
+ formats = self._extract_akamai_formats(asset_url, video_id)
+ if not formats:
+ continue
+ self._sort_formats(formats)
+
+ # Some f4m streams may not work with hdcore in fragments' URLs
+ for f in formats:
+ extra_param = f.get('extra_param_to_segment_url')
+ if extra_param and 'hdcore' in extra_param:
+ del f['extra_param_to_segment_url']
+
+ entry_id, entry_title = video_id_and_title(num)
+ duration = parse_duration(asset.get('duration'))
+ subtitles = {}
+ for subtitle in ('webVtt', 'timedText'):
+ subtitle_url = asset.get('%sSubtitlesUrl' % subtitle)
+ if subtitle_url:
+ subtitles.setdefault('no', []).append({
+ 'url': compat_urllib_parse_unquote(subtitle_url)
+ })
+ entries.append({
+ 'id': asset.get('carrierId') or entry_id,
+ 'title': make_title(entry_title),
+ 'duration': duration,
+ 'subtitles': subtitles,
+ 'formats': formats,
+ })
+
+ if not entries:
+ media_url = data.get('mediaUrl')
+ if media_url:
+ formats = self._extract_akamai_formats(media_url, video_id)
+ self._sort_formats(formats)
+ duration = parse_duration(data.get('duration'))
+ entries = [{
+ 'id': video_id,
+ 'title': make_title(title),
+ 'duration': duration,
+ 'formats': formats,
+ }]
+
+ if not entries:
+ MESSAGES = {
+ 'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
+ 'ProgramRightsHasExpired': 'Programmet har gått ut',
+ 'NoProgramRights': 'Ikke tilgjengelig',
+ 'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
+ }
+ message_type = data.get('messageType', '')
+ # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
+ if 'IsGeoBlocked' in message_type:
+ self.raise_geo_restricted(
+ msg=MESSAGES.get('ProgramIsGeoBlocked'),
+ countries=self._GEO_COUNTRIES)
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, MESSAGES.get(
+ message_type, message_type)),
+ expected=True)
+
+ series = conviva.get('seriesName') or data.get('seriesTitle')
+ episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
+
+ season_number = None
+ episode_number = None
+ if data.get('mediaElementType') == 'Episode':
+ _season_episode = data.get('scoresStatistics', {}).get('springStreamStream') or \
+ data.get('relativeOriginUrl', '')
+ EPISODENUM_RE = [
+ r'/s(?P<season>\d{,2})e(?P<episode>\d{,2})\.',
+ r'/sesong-(?P<season>\d{,2})/episode-(?P<episode>\d{,2})',
+ ]
+ season_number = int_or_none(self._search_regex(
+ EPISODENUM_RE, _season_episode, 'season number',
+ default=None, group='season'))
+ episode_number = int_or_none(self._search_regex(
+ EPISODENUM_RE, _season_episode, 'episode number',
+ default=None, group='episode'))
+
+ thumbnails = None
+ images = data.get('images')
+ if images and isinstance(images, dict):
+ web_images = images.get('webImages')
+ if isinstance(web_images, list):
+ thumbnails = [{
+ 'url': image['imageUrl'],
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ } for image in web_images if image.get('imageUrl')]
+
+ description = data.get('description')
+ category = data.get('mediaAnalytics', {}).get('category')
+
+ common_info = {
+ 'description': description,
+ 'series': series,
+ 'episode': episode,
+ 'season_number': season_number,
+ 'episode_number': episode_number,
+ 'categories': [category] if category else None,
+ 'age_limit': parse_age_limit(data.get('legalAge')),
+ 'thumbnails': thumbnails,
+ }
+
+ vcodec = 'none' if data.get('mediaType') == 'Audio' else None
+
+ for entry in entries:
+ entry.update(common_info)
+ for f in entry['formats']:
+ f['vcodec'] = vcodec
+
+ points = data.get('shortIndexPoints')
+ if isinstance(points, list):
+ chapters = []
+ for next_num, point in enumerate(points, start=1):
+ if not isinstance(point, dict):
+ continue
+ start_time = parse_duration(point.get('startPoint'))
+ if start_time is None:
+ continue
+ end_time = parse_duration(
+ data.get('duration')
+ if next_num == len(points)
+ else points[next_num].get('startPoint'))
+ if end_time is None:
+ continue
+ chapters.append({
+ 'start_time': start_time,
+ 'end_time': end_time,
+ 'title': point.get('title'),
+ })
+ if chapters and len(entries) == 1:
+ entries[0]['chapters'] = chapters
+
+ return self.playlist_result(entries, video_id, title, description)
+
+
+class NRKIE(NRKBaseIE):
+ _VALID_URL = r'''(?x)
+ (?:
+ nrk:|
+ https?://
+ (?:
+ (?:www\.)?nrk\.no/video/PS\*|
+ v8[-.]psapi\.nrk\.no/mediaelement/
+ )
+ )
+ (?P<id>[^?#&]+)
+ '''
+ _API_HOSTS = ('psapi.nrk.no', 'v8-psapi.nrk.no')
+ _TESTS = [{
+ # video
+ 'url': 'http://www.nrk.no/video/PS*150533',
+ 'md5': '706f34cdf1322577589e369e522b50ef',
+ 'info_dict': {
+ 'id': '150533',
+ 'ext': 'mp4',
+ 'title': 'Dompap og andre fugler i Piip-Show',
+ 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f',
+ 'duration': 262,
+ }
+ }, {
+ # audio
+ 'url': 'http://www.nrk.no/video/PS*154915',
+ # MD5 is unstable
+ 'info_dict': {
+ 'id': '154915',
+ 'ext': 'flv',
+ 'title': 'Slik høres internett ut når du er blind',
+ 'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
+ 'duration': 20,
+ }
+ }, {
+ 'url': 'nrk:ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
+ 'only_matching': True,
+ }, {
+ 'url': 'nrk:clip/7707d5a3-ebe7-434a-87d5-a3ebe7a34a70',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
+ 'only_matching': True,
+ }]
+
+
+class NRKTVIE(NRKBaseIE):
+ IE_DESC = 'NRK TV and NRK Radio'
+ _EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:tv|radio)\.nrk(?:super)?\.no/
+ (?:serie(?:/[^/]+){1,2}|program)/
+ (?![Ee]pisodes)%s
+ (?:/\d{2}-\d{2}-\d{4})?
+ (?:\#del=(?P<part_id>\d+))?
+ ''' % _EPISODE_RE
+ _API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no')
+ _TESTS = [{
+ 'url': 'https://tv.nrk.no/program/MDDP12000117',
+ 'md5': '8270824df46ec629b66aeaa5796b36fb',
+ 'info_dict': {
+ 'id': 'MDDP12000117AA',
+ 'ext': 'mp4',
+ 'title': 'Alarm Trolltunga',
+ 'description': 'md5:46923a6e6510eefcce23d5ef2a58f2ce',
+ 'duration': 2223,
+ 'age_limit': 6,
+ },
+ }, {
+ 'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
+ 'md5': '9a167e54d04671eb6317a37b7bc8a280',
+ 'info_dict': {
+ 'id': 'MUHH48000314AA',
+ 'ext': 'mp4',
+ 'title': '20 spørsmål 23.05.2014',
+ 'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
+ 'duration': 1741,
+ 'series': '20 spørsmål',
+ 'episode': '23.05.2014',
+ },
+ 'skip': 'NoProgramRights',
+ }, {
+ 'url': 'https://tv.nrk.no/program/mdfp15000514',
+ 'info_dict': {
+ 'id': 'MDFP15000514CA',
+ 'ext': 'mp4',
+ 'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting 24.05.2014',
+ 'description': 'md5:89290c5ccde1b3a24bb8050ab67fe1db',
+ 'duration': 4605,
+ 'series': 'Kunnskapskanalen',
+ 'episode': '24.05.2014',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # single playlist video
+ 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
+ 'info_dict': {
+ 'id': 'MSPO40010515-part2',
+ 'ext': 'flv',
+ 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
+ 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Video is geo restricted'],
+ 'skip': 'particular part is not supported currently',
+ }, {
+ 'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'MSPO40010515AH',
+ 'ext': 'mp4',
+ 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 1)',
+ 'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
+ 'duration': 772,
+ 'series': 'Tour de Ski',
+ 'episode': '06.01.2015',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'info_dict': {
+ 'id': 'MSPO40010515BH',
+ 'ext': 'mp4',
+ 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015 (Part 2)',
+ 'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
+ 'duration': 6175,
+ 'series': 'Tour de Ski',
+ 'episode': '06.01.2015',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }],
+ 'info_dict': {
+ 'id': 'MSPO40010515',
+ 'title': 'Sprint fri teknikk, kvinner og menn 06.01.2015',
+ 'description': 'md5:1f97a41f05a9486ee00c56f35f82993d',
+ },
+ 'expected_warnings': ['Video is geo restricted'],
+ }, {
+ 'url': 'https://tv.nrk.no/serie/anno/KMTE50001317/sesong-3/episode-13',
+ 'info_dict': {
+ 'id': 'KMTE50001317AA',
+ 'ext': 'mp4',
+ 'title': 'Anno 13:30',
+ 'description': 'md5:11d9613661a8dbe6f9bef54e3a4cbbfa',
+ 'duration': 2340,
+ 'series': 'Anno',
+ 'episode': '13:30',
+ 'season_number': 3,
+ 'episode_number': 13,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://tv.nrk.no/serie/nytt-paa-nytt/MUHH46000317/27-01-2017',
+ 'info_dict': {
+ 'id': 'MUHH46000317AA',
+ 'ext': 'mp4',
+ 'title': 'Nytt på Nytt 27.01.2017',
+ 'description': 'md5:5358d6388fba0ea6f0b6d11c48b9eb4b',
+ 'duration': 1796,
+ 'series': 'Nytt på nytt',
+ 'episode': '27.01.2017',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tv.nrk.no/serie/lindmo/2018/MUHU11006318/avspiller',
+ 'only_matching': True,
+ }]
+
+
+class NRKTVEpisodeIE(InfoExtractor):
+ _VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+/sesong/\d+/episode/\d+)'
+ _TESTS = [{
+ 'url': 'https://tv.nrk.no/serie/hellums-kro/sesong/1/episode/2',
+ 'info_dict': {
+ 'id': 'MUHH36005220BA',
+ 'ext': 'mp4',
+ 'title': 'Kro, krig og kjærlighet 2:6',
+ 'description': 'md5:b32a7dc0b1ed27c8064f58b97bda4350',
+ 'duration': 1563,
+ 'series': 'Hellums kro',
+ 'season_number': 1,
+ 'episode_number': 2,
+ 'episode': '2:6',
+ 'age_limit': 6,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://tv.nrk.no/serie/backstage/sesong/1/episode/8',
+ 'info_dict': {
+ 'id': 'MSUI14000816AA',
+ 'ext': 'mp4',
+ 'title': 'Backstage 8:30',
+ 'description': 'md5:de6ca5d5a2d56849e4021f2bf2850df4',
+ 'duration': 1320,
+ 'series': 'Backstage',
+ 'season_number': 1,
+ 'episode_number': 8,
+ 'episode': '8:30',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'ProgramRightsHasExpired',
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ nrk_id = self._parse_json(
+ self._search_regex(JSON_LD_RE, webpage, 'JSON-LD', group='json_ld'),
+ display_id)['@id']
+
+ assert re.match(NRKTVIE._EPISODE_RE, nrk_id)
+ return self.url_result(
+ 'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id)
+
+
+class NRKTVSerieBaseIE(InfoExtractor):
+ def _extract_series(self, webpage, display_id, fatal=True):
+ config = self._parse_json(
+ self._search_regex(
+ (r'INITIAL_DATA(?:_V\d)?_*\s*=\s*({.+?})\s*;',
+ r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>'),
+ webpage, 'config', default='{}' if not fatal else NO_DEFAULT),
+ display_id, fatal=False, transform_source=js_to_json)
+ if not config:
+ return
+ return try_get(
+ config,
+ (lambda x: x['initialState']['series'], lambda x: x['series']),
+ dict)
+
+ def _extract_seasons(self, seasons):
+ if not isinstance(seasons, list):
+ return []
+ entries = []
+ for season in seasons:
+ entries.extend(self._extract_episodes(season))
+ return entries
+
+ def _extract_episodes(self, season):
+ if not isinstance(season, dict):
+ return []
+ return self._extract_entries(season.get('episodes'))
+
+ def _extract_entries(self, entry_list):
+ if not isinstance(entry_list, list):
+ return []
+ entries = []
+ for episode in entry_list:
+ nrk_id = episode.get('prfId')
+ if not nrk_id or not isinstance(nrk_id, compat_str):
+ continue
+ entries.append(self.url_result(
+ 'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id))
+ return entries
+
+
+class NRKTVSeasonIE(NRKTVSerieBaseIE):
+ _VALID_URL = r'https?://tv\.nrk\.no/serie/[^/]+/sesong/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://tv.nrk.no/serie/backstage/sesong/1',
+ 'info_dict': {
+ 'id': '1',
+ 'title': 'Sesong 1',
+ },
+ 'playlist_mincount': 30,
+ }
+
+ @classmethod
+ def suitable(cls, url):
+ return (False if NRKTVIE.suitable(url) or NRKTVEpisodeIE.suitable(url)
+ else super(NRKTVSeasonIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ series = self._extract_series(webpage, display_id)
+
+ season = next(
+ s for s in series['seasons']
+ if int(display_id) == s.get('seasonNumber'))
+
+ title = try_get(season, lambda x: x['titles']['title'], compat_str)
+ return self.playlist_result(
+ self._extract_episodes(season), display_id, title)
+
+
+class NRKTVSeriesIE(NRKTVSerieBaseIE):
+ _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)'
+ _ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://tv.nrk.no/serie/blank',
+ 'info_dict': {
+ 'id': 'blank',
+ 'title': 'Blank',
+ 'description': 'md5:7664b4e7e77dc6810cd3bca367c25b6e',
+ },
+ 'playlist_mincount': 30,
+ }, {
+ # new layout, seasons
+ 'url': 'https://tv.nrk.no/serie/backstage',
+ 'info_dict': {
+ 'id': 'backstage',
+ 'title': 'Backstage',
+ 'description': 'md5:c3ec3a35736fca0f9e1207b5511143d3',
+ },
+ 'playlist_mincount': 60,
+ }, {
+ # new layout, instalments
+ 'url': 'https://tv.nrk.no/serie/groenn-glede',
+ 'info_dict': {
+ 'id': 'groenn-glede',
+ 'title': 'Grønn glede',
+ 'description': 'md5:7576e92ae7f65da6993cf90ee29e4608',
+ },
+ 'playlist_mincount': 10,
+ }, {
+ # old layout
+ 'url': 'https://tv.nrksuper.no/serie/labyrint',
+ 'info_dict': {
+ 'id': 'labyrint',
+ 'title': 'Labyrint',
+ 'description': 'md5:318b597330fdac5959247c9b69fdb1ec',
+ },
+ 'playlist_mincount': 3,
+ }, {
+ 'url': 'https://tv.nrk.no/serie/broedrene-dal-og-spektralsteinene',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tv.nrk.no/serie/saving-the-human-race',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tv.nrk.no/serie/postmann-pat',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (
+ False if any(ie.suitable(url)
+ for ie in (NRKTVIE, NRKTVEpisodeIE, NRKTVSeasonIE))
+ else super(NRKTVSeriesIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ series_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, series_id)
+
+ # New layout (e.g. https://tv.nrk.no/serie/backstage)
+ series = self._extract_series(webpage, series_id, fatal=False)
+ if series:
+ title = try_get(series, lambda x: x['titles']['title'], compat_str)
+ description = try_get(
+ series, lambda x: x['titles']['subtitle'], compat_str)
+ entries = []
+ entries.extend(self._extract_seasons(series.get('seasons')))
+ entries.extend(self._extract_entries(series.get('instalments')))
+ entries.extend(self._extract_episodes(series.get('extraMaterial')))
+ return self.playlist_result(entries, series_id, title, description)
+
+ # Old layout (e.g. https://tv.nrksuper.no/serie/labyrint)
+ entries = [
+ self.url_result(
+ 'https://tv.nrk.no/program/Episodes/{series}/{season}'.format(
+ series=series_id, season=season_id))
+ for season_id in re.findall(self._ITEM_RE, webpage)
+ ]
+
+ title = self._html_search_meta(
+ 'seriestitle', webpage,
+ 'title', default=None) or self._og_search_title(
+ webpage, fatal=False)
+ if title:
+ title = self._search_regex(
+ r'NRK (?:Super )?TV\s*[-–]\s*(.+)', title, 'title', default=title)
+
+ description = self._html_search_meta(
+ 'series_description', webpage,
+ 'description', default=None) or self._og_search_description(webpage)
+
+ return self.playlist_result(entries, series_id, title, description)
+
+
+class NRKTVDirekteIE(NRKTVIE):
+ IE_DESC = 'NRK TV Direkte and NRK Radio Direkte'
+ _VALID_URL = r'https?://(?:tv|radio)\.nrk\.no/direkte/(?P<id>[^/?#&]+)'
+
+ _TESTS = [{
+ 'url': 'https://tv.nrk.no/direkte/nrk1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://radio.nrk.no/direkte/p1_oslo_akershus',
+ 'only_matching': True,
+ }]
+
+
+class NRKPlaylistBaseIE(InfoExtractor):
+ def _extract_description(self, webpage):
+ pass
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result('nrk:%s' % video_id, NRKIE.ie_key())
+ for video_id in re.findall(self._ITEM_RE, webpage)
+ ]
+
+ playlist_title = self. _extract_title(webpage)
+ playlist_description = self._extract_description(webpage)
+
+ return self.playlist_result(
+ entries, playlist_id, playlist_title, playlist_description)
+
+
+class NRKPlaylistIE(NRKPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?nrk\.no/(?!video|skole)(?:[^/]+/)+(?P<id>[^/]+)'
+ _ITEM_RE = r'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"'
+ _TESTS = [{
+ 'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763',
+ 'info_dict': {
+ 'id': 'gjenopplev-den-historiske-solformorkelsen-1.12270763',
+ 'title': 'Gjenopplev den historiske solformørkelsen',
+ 'description': 'md5:c2df8ea3bac5654a26fc2834a542feed',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'http://www.nrk.no/kultur/bok/rivertonprisen-til-karin-fossum-1.12266449',
+ 'info_dict': {
+ 'id': 'rivertonprisen-til-karin-fossum-1.12266449',
+ 'title': 'Rivertonprisen til Karin Fossum',
+ 'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.',
+ },
+ 'playlist_count': 2,
+ }]
+
+ def _extract_title(self, webpage):
+ return self._og_search_title(webpage, fatal=False)
+
+ def _extract_description(self, webpage):
+ return self._og_search_description(webpage)
+
+
+class NRKTVEpisodesIE(NRKPlaylistBaseIE):
+ _VALID_URL = r'https?://tv\.nrk\.no/program/[Ee]pisodes/[^/]+/(?P<id>\d+)'
+ _ITEM_RE = r'data-episode=["\']%s' % NRKTVIE._EPISODE_RE
+ _TESTS = [{
+ 'url': 'https://tv.nrk.no/program/episodes/nytt-paa-nytt/69031',
+ 'info_dict': {
+ 'id': '69031',
+ 'title': 'Nytt på nytt, sesong: 201210',
+ },
+ 'playlist_count': 4,
+ }]
+
+ def _extract_title(self, webpage):
+ return self._html_search_regex(
+ r'<h1>([^<]+)</h1>', webpage, 'title', fatal=False)
+
+
+class NRKSkoleIE(InfoExtractor):
+ IE_DESC = 'NRK Skole'
+ _VALID_URL = r'https?://(?:www\.)?nrk\.no/skole/?\?.*\bmediaId=(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://www.nrk.no/skole/?page=search&q=&mediaId=14099',
+ 'md5': '18c12c3d071953c3bf8d54ef6b2587b7',
+ 'info_dict': {
+ 'id': '6021',
+ 'ext': 'mp4',
+ 'title': 'Genetikk og eneggede tvillinger',
+ 'description': 'md5:3aca25dcf38ec30f0363428d2b265f8d',
+ 'duration': 399,
+ },
+ }, {
+ 'url': 'https://www.nrk.no/skole/?page=objectives&subject=naturfag&objective=K15114&mediaId=19355',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'https://mimir.nrk.no/plugin/1.0/static?mediaId=%s' % video_id,
+ video_id)
+
+ nrk_id = self._parse_json(
+ self._search_regex(
+ r'<script[^>]+type=["\']application/json["\'][^>]*>({.+?})</script>',
+ webpage, 'application json'),
+ video_id)['activeMedia']['psId']
+
+ return self.url_result('nrk:%s' % nrk_id)
diff --git a/youtube_dlc/extractor/nrl.py b/youtube_dlc/extractor/nrl.py
new file mode 100644
index 000000000..22a2df8d3
--- /dev/null
+++ b/youtube_dlc/extractor/nrl.py
@@ -0,0 +1,30 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class NRLTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?nrl\.com/tv(/[^/]+)*/(?P<id>[^/?&#]+)'
+ _TEST = {
+ 'url': 'https://www.nrl.com/tv/news/match-highlights-titans-v-knights-862805/',
+ 'info_dict': {
+ 'id': 'YyNnFuaDE6kPJqlDhG4CGQ_w89mKTau4',
+ 'ext': 'mp4',
+ 'title': 'Match Highlights: Titans v Knights',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ 'format': 'bestvideo',
+ },
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ q_data = self._parse_json(self._html_search_regex(
+ r'(?s)q-data="({.+?})"', webpage, 'player data'), display_id)
+ ooyala_id = q_data['videoId']
+ return self.url_result(
+ 'ooyala:' + ooyala_id, 'Ooyala', ooyala_id, q_data.get('title'))
diff --git a/youtube_dl/extractor/ntvcojp.py b/youtube_dlc/extractor/ntvcojp.py
index 0c8221b22..0c8221b22 100644
--- a/youtube_dl/extractor/ntvcojp.py
+++ b/youtube_dlc/extractor/ntvcojp.py
diff --git a/youtube_dl/extractor/ntvde.py b/youtube_dlc/extractor/ntvde.py
index 101a5374c..101a5374c 100644
--- a/youtube_dl/extractor/ntvde.py
+++ b/youtube_dlc/extractor/ntvde.py
diff --git a/youtube_dlc/extractor/ntvru.py b/youtube_dlc/extractor/ntvru.py
new file mode 100644
index 000000000..c47d1dfa4
--- /dev/null
+++ b/youtube_dlc/extractor/ntvru.py
@@ -0,0 +1,131 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ strip_or_none,
+ unescapeHTML,
+ xpath_text,
+)
+
+
+class NTVRuIE(InfoExtractor):
+ IE_NAME = 'ntv.ru'
+ _VALID_URL = r'https?://(?:www\.)?ntv\.ru/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+
+ _TESTS = [{
+ 'url': 'http://www.ntv.ru/novosti/863142/',
+ 'md5': 'ba7ea172a91cb83eb734cad18c10e723',
+ 'info_dict': {
+ 'id': '746000',
+ 'ext': 'mp4',
+ 'title': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
+ 'description': 'Командующий Черноморским флотом провел переговоры в штабе ВМС Украины',
+ 'thumbnail': r're:^http://.*\.jpg',
+ 'duration': 136,
+ },
+ }, {
+ 'url': 'http://www.ntv.ru/video/novosti/750370/',
+ 'md5': 'adecff79691b4d71e25220a191477124',
+ 'info_dict': {
+ 'id': '750370',
+ 'ext': 'mp4',
+ 'title': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
+ 'description': 'Родные пассажиров пропавшего Boeing не верят в трагический исход',
+ 'thumbnail': r're:^http://.*\.jpg',
+ 'duration': 172,
+ },
+ }, {
+ 'url': 'http://www.ntv.ru/peredacha/segodnya/m23700/o232416',
+ 'md5': '82dbd49b38e3af1d00df16acbeab260c',
+ 'info_dict': {
+ 'id': '747480',
+ 'ext': 'mp4',
+ 'title': '«Сегодня». 21 марта 2014 года. 16:00',
+ 'description': '«Сегодня». 21 марта 2014 года. 16:00',
+ 'thumbnail': r're:^http://.*\.jpg',
+ 'duration': 1496,
+ },
+ }, {
+ 'url': 'https://www.ntv.ru/kino/Koma_film/m70281/o336036/video/',
+ 'md5': 'e9c7cde24d9d3eaed545911a04e6d4f4',
+ 'info_dict': {
+ 'id': '1126480',
+ 'ext': 'mp4',
+ 'title': 'Остросюжетный фильм «Кома»',
+ 'description': 'Остросюжетный фильм «Кома»',
+ 'thumbnail': r're:^http://.*\.jpg',
+ 'duration': 5592,
+ },
+ }, {
+ 'url': 'http://www.ntv.ru/serial/Delo_vrachey/m31760/o233916/',
+ 'md5': '9320cd0e23f3ea59c330dc744e06ff3b',
+ 'info_dict': {
+ 'id': '751482',
+ 'ext': 'mp4',
+ 'title': '«Дело врачей»: «Деревце жизни»',
+ 'description': '«Дело врачей»: «Деревце жизни»',
+ 'thumbnail': r're:^http://.*\.jpg',
+ 'duration': 2590,
+ },
+ }, {
+ # Schemeless file URL
+ 'url': 'https://www.ntv.ru/video/1797442',
+ 'only_matching': True,
+ }]
+
+ _VIDEO_ID_REGEXES = [
+ r'<meta property="og:url" content="http://www\.ntv\.ru/video/(\d+)',
+ r'<video embed=[^>]+><id>(\d+)</id>',
+ r'<video restriction[^>]+><key>(\d+)</key>',
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self._og_search_property(
+ ('video', 'video:iframe'), webpage, default=None)
+ if video_url:
+ video_id = self._search_regex(
+ r'https?://(?:www\.)?ntv\.ru/video/(?:embed/)?(\d+)',
+ video_url, 'video id', default=None)
+
+ if not video_id:
+ video_id = self._html_search_regex(
+ self._VIDEO_ID_REGEXES, webpage, 'video id')
+
+ player = self._download_xml(
+ 'http://www.ntv.ru/vi%s/' % video_id,
+ video_id, 'Downloading video XML')
+
+ title = strip_or_none(unescapeHTML(xpath_text(player, './data/title', 'title', fatal=True)))
+
+ video = player.find('./data/video')
+
+ formats = []
+ for format_id in ['', 'hi', 'webm']:
+ file_ = xpath_text(video, './%sfile' % format_id)
+ if not file_:
+ continue
+ if file_.startswith('//'):
+ file_ = self._proto_relative_url(file_)
+ elif not file_.startswith('http'):
+ file_ = 'http://media.ntv.ru/vod/' + file_
+ formats.append({
+ 'url': file_,
+ 'filesize': int_or_none(xpath_text(video, './%ssize' % format_id)),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': xpath_text(video, './id'),
+ 'title': title,
+ 'description': strip_or_none(unescapeHTML(xpath_text(player, './data/description'))),
+ 'thumbnail': xpath_text(video, './splash'),
+ 'duration': int_or_none(xpath_text(video, './totaltime')),
+ 'view_count': int_or_none(xpath_text(video, './views')),
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/nuevo.py b/youtube_dlc/extractor/nuevo.py
index be1e09d37..be1e09d37 100644
--- a/youtube_dl/extractor/nuevo.py
+++ b/youtube_dlc/extractor/nuevo.py
diff --git a/youtube_dl/extractor/nuvid.py b/youtube_dlc/extractor/nuvid.py
index ab6bfcd7f..ab6bfcd7f 100644
--- a/youtube_dl/extractor/nuvid.py
+++ b/youtube_dlc/extractor/nuvid.py
diff --git a/youtube_dlc/extractor/nytimes.py b/youtube_dlc/extractor/nytimes.py
new file mode 100644
index 000000000..fc78ca56c
--- /dev/null
+++ b/youtube_dlc/extractor/nytimes.py
@@ -0,0 +1,223 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import hmac
+import hashlib
+import base64
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ js_to_json,
+ mimetype2ext,
+ parse_iso8601,
+ remove_start,
+)
+
+
+class NYTimesBaseIE(InfoExtractor):
+ _SECRET = b'pX(2MbU2);4N{7J8)>YwKRJ+/pQ3JkiU2Q^V>mFYv6g6gYvt6v'
+
+ def _extract_video_from_id(self, video_id):
+ # Authorization generation algorithm is reverse engineered from `signer` in
+ # http://graphics8.nytimes.com/video/vhs/vhs-2.x.min.js
+ path = '/svc/video/api/v3/video/' + video_id
+ hm = hmac.new(self._SECRET, (path + ':vhs').encode(), hashlib.sha512).hexdigest()
+ video_data = self._download_json('http://www.nytimes.com' + path, video_id, 'Downloading video JSON', headers={
+ 'Authorization': 'NYTV ' + base64.b64encode(hm.encode()).decode(),
+ 'X-NYTV': 'vhs',
+ }, fatal=False)
+ if not video_data:
+ video_data = self._download_json(
+ 'http://www.nytimes.com/svc/video/api/v2/video/' + video_id,
+ video_id, 'Downloading video JSON')
+
+ title = video_data['headline']
+
+ def get_file_size(file_size):
+ if isinstance(file_size, int):
+ return file_size
+ elif isinstance(file_size, dict):
+ return int(file_size.get('value', 0))
+ else:
+ return None
+
+ urls = []
+ formats = []
+ for video in video_data.get('renditions', []):
+ video_url = video.get('url')
+ format_id = video.get('type')
+ if not video_url or format_id == 'thumbs' or video_url in urls:
+ continue
+ urls.append(video_url)
+ ext = mimetype2ext(video.get('mimetype')) or determine_ext(video_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id=format_id or 'hls', fatal=False))
+ elif ext == 'mpd':
+ continue
+ # formats.extend(self._extract_mpd_formats(
+ # video_url, video_id, format_id or 'dash', fatal=False))
+ else:
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ 'vcodec': video.get('videoencoding') or video.get('video_codec'),
+ 'width': int_or_none(video.get('width')),
+ 'height': int_or_none(video.get('height')),
+ 'filesize': get_file_size(video.get('file_size') or video.get('fileSize')),
+ 'tbr': int_or_none(video.get('bitrate'), 1000) or None,
+ 'ext': ext,
+ })
+ self._sort_formats(formats, ('height', 'width', 'filesize', 'tbr', 'fps', 'format_id'))
+
+ thumbnails = []
+ for image in video_data.get('images', []):
+ image_url = image.get('url')
+ if not image_url:
+ continue
+ thumbnails.append({
+ 'url': 'http://www.nytimes.com/' + image_url,
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ })
+
+ publication_date = video_data.get('publication_date')
+ timestamp = parse_iso8601(publication_date[:-8]) if publication_date else None
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video_data.get('summary'),
+ 'timestamp': timestamp,
+ 'uploader': video_data.get('byline'),
+ 'duration': float_or_none(video_data.get('duration'), 1000),
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ }
+
+
+class NYTimesIE(NYTimesBaseIE):
+ _VALID_URL = r'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263',
+ 'md5': 'd665342765db043f7e225cff19df0f2d',
+ 'info_dict': {
+ 'id': '100000002847155',
+ 'ext': 'mov',
+ 'title': 'Verbatim: What Is a Photocopier?',
+ 'description': 'md5:93603dada88ddbda9395632fdc5da260',
+ 'timestamp': 1398631707,
+ 'upload_date': '20140427',
+ 'uploader': 'Brett Weiner',
+ 'duration': 419,
+ }
+ }, {
+ 'url': 'http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ return self._extract_video_from_id(video_id)
+
+
+class NYTimesArticleIE(NYTimesBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?nytimes\.com/(.(?<!video))*?/(?:[^/]+/)*(?P<id>[^.]+)(?:\.html)?'
+ _TESTS = [{
+ 'url': 'http://www.nytimes.com/2015/04/14/business/owner-of-gravity-payments-a-credit-card-processor-is-setting-a-new-minimum-wage-70000-a-year.html?_r=0',
+ 'md5': 'e2076d58b4da18e6a001d53fd56db3c9',
+ 'info_dict': {
+ 'id': '100000003628438',
+ 'ext': 'mov',
+ 'title': 'New Minimum Wage: $70,000 a Year',
+ 'description': 'Dan Price, C.E.O. of Gravity Payments, surprised his 120-person staff by announcing that he planned over the next three years to raise the salary of every employee to $70,000 a year.',
+ 'timestamp': 1429033037,
+ 'upload_date': '20150414',
+ 'uploader': 'Matthew Williams',
+ }
+ }, {
+ 'url': 'http://www.nytimes.com/2016/10/14/podcasts/revelations-from-the-final-weeks.html',
+ 'md5': 'e0d52040cafb07662acf3c9132db3575',
+ 'info_dict': {
+ 'id': '100000004709062',
+ 'title': 'The Run-Up: ‘He Was Like an Octopus’',
+ 'ext': 'mp3',
+ 'description': 'md5:fb5c6b93b12efc51649b4847fe066ee4',
+ 'series': 'The Run-Up',
+ 'episode': '‘He Was Like an Octopus’',
+ 'episode_number': 20,
+ 'duration': 2130,
+ }
+ }, {
+ 'url': 'http://www.nytimes.com/2016/10/16/books/review/inside-the-new-york-times-book-review-the-rise-of-hitler.html',
+ 'info_dict': {
+ 'id': '100000004709479',
+ 'title': 'The Rise of Hitler',
+ 'ext': 'mp3',
+ 'description': 'md5:bce877fd9e3444990cb141875fab0028',
+ 'creator': 'Pamela Paul',
+ 'duration': 3475,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.nytimes.com/news/minute/2014/03/17/times-minute-whats-next-in-crimea/?_php=true&_type=blogs&_php=true&_type=blogs&_r=1',
+ 'only_matching': True,
+ }]
+
+ def _extract_podcast_from_json(self, json, page_id, webpage):
+ podcast_audio = self._parse_json(
+ json, page_id, transform_source=js_to_json)
+
+ audio_data = podcast_audio['data']
+ track = audio_data['track']
+
+ episode_title = track['title']
+ video_url = track['source']
+
+ description = track.get('description') or self._html_search_meta(
+ ['og:description', 'twitter:description'], webpage)
+
+ podcast_title = audio_data.get('podcast', {}).get('title')
+ title = ('%s: %s' % (podcast_title, episode_title)
+ if podcast_title else episode_title)
+
+ episode = audio_data.get('podcast', {}).get('episode') or ''
+ episode_number = int_or_none(self._search_regex(
+ r'[Ee]pisode\s+(\d+)', episode, 'episode number', default=None))
+
+ return {
+ 'id': remove_start(podcast_audio.get('target'), 'FT') or page_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'creator': track.get('credit'),
+ 'series': podcast_title,
+ 'episode': episode_title,
+ 'episode_number': episode_number,
+ 'duration': int_or_none(track.get('duration')),
+ }
+
+ def _real_extract(self, url):
+ page_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, page_id)
+
+ video_id = self._search_regex(
+ r'data-videoid=["\'](\d+)', webpage, 'video id',
+ default=None, fatal=False)
+ if video_id is not None:
+ return self._extract_video_from_id(video_id)
+
+ podcast_data = self._search_regex(
+ (r'NYTD\.FlexTypes\.push\s*\(\s*({.+?})\s*\)\s*;\s*</script',
+ r'NYTD\.FlexTypes\.push\s*\(\s*({.+})\s*\)\s*;'),
+ webpage, 'podcast data')
+ return self._extract_podcast_from_json(podcast_data, page_id, webpage)
diff --git a/youtube_dl/extractor/nzz.py b/youtube_dlc/extractor/nzz.py
index 61ee77adb..61ee77adb 100644
--- a/youtube_dl/extractor/nzz.py
+++ b/youtube_dlc/extractor/nzz.py
diff --git a/youtube_dl/extractor/odatv.py b/youtube_dlc/extractor/odatv.py
index 314527f98..314527f98 100644
--- a/youtube_dl/extractor/odatv.py
+++ b/youtube_dlc/extractor/odatv.py
diff --git a/youtube_dlc/extractor/odnoklassniki.py b/youtube_dlc/extractor/odnoklassniki.py
new file mode 100644
index 000000000..7ed9fac55
--- /dev/null
+++ b/youtube_dlc/extractor/odnoklassniki.py
@@ -0,0 +1,268 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_etree_fromstring,
+ compat_parse_qs,
+ compat_urllib_parse_unquote,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ unified_strdate,
+ int_or_none,
+ qualities,
+ unescapeHTML,
+ urlencode_postdata,
+)
+
+
+class OdnoklassnikiIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:(?:www|m|mobile)\.)?
+ (?:odnoklassniki|ok)\.ru/
+ (?:
+ video(?:embed)?/|
+ web-api/video/moviePlayer/|
+ live/|
+ dk\?.*?st\.mvId=
+ )
+ (?P<id>[\d-]+)
+ '''
+ _TESTS = [{
+ # metadata in JSON
+ 'url': 'http://ok.ru/video/20079905452',
+ 'md5': '0b62089b479e06681abaaca9d204f152',
+ 'info_dict': {
+ 'id': '20079905452',
+ 'ext': 'mp4',
+ 'title': 'Культура меняет нас (прекрасный ролик!))',
+ 'duration': 100,
+ 'upload_date': '20141207',
+ 'uploader_id': '330537914540',
+ 'uploader': 'Виталий Добровольский',
+ 'like_count': int,
+ 'age_limit': 0,
+ },
+ }, {
+ # metadataUrl
+ 'url': 'http://ok.ru/video/63567059965189-0?fromTime=5',
+ 'md5': '6ff470ea2dd51d5d18c295a355b0b6bc',
+ 'info_dict': {
+ 'id': '63567059965189-0',
+ 'ext': 'mp4',
+ 'title': 'Девушка без комплексов ...',
+ 'duration': 191,
+ 'upload_date': '20150518',
+ 'uploader_id': '534380003155',
+ 'uploader': '☭ Андрей Мещанинов ☭',
+ 'like_count': int,
+ 'age_limit': 0,
+ 'start_time': 5,
+ },
+ }, {
+ # YouTube embed (metadataUrl, provider == USER_YOUTUBE)
+ 'url': 'http://ok.ru/video/64211978996595-1',
+ 'md5': '2f206894ffb5dbfcce2c5a14b909eea5',
+ 'info_dict': {
+ 'id': 'V_VztHT5BzY',
+ 'ext': 'mp4',
+ 'title': 'Космическая среда от 26 августа 2015',
+ 'description': 'md5:848eb8b85e5e3471a3a803dae1343ed0',
+ 'duration': 440,
+ 'upload_date': '20150826',
+ 'uploader_id': 'tvroscosmos',
+ 'uploader': 'Телестудия Роскосмоса',
+ 'age_limit': 0,
+ },
+ }, {
+ # YouTube embed (metadata, provider == USER_YOUTUBE, no metadata.movie.title field)
+ 'url': 'http://ok.ru/video/62036049272859-0',
+ 'info_dict': {
+ 'id': '62036049272859-0',
+ 'ext': 'mp4',
+ 'title': 'МУЗЫКА ДОЖДЯ .',
+ 'description': 'md5:6f1867132bd96e33bf53eda1091e8ed0',
+ 'upload_date': '20120106',
+ 'uploader_id': '473534735899',
+ 'uploader': 'МARINA D',
+ 'age_limit': 0,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'Video has not been found',
+ }, {
+ 'url': 'http://ok.ru/web-api/video/moviePlayer/20079905452',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ok.ru/video/20648036891',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.ok.ru/videoembed/20648036891',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://m.ok.ru/video/20079905452',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://mobile.ok.ru/video/20079905452',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.ok.ru/live/484531969818',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.ok.ru/dk?st.cmd=movieLayer&st.discId=863789452017&st.retLoc=friend&st.rtu=%2Fdk%3Fst.cmd%3DfriendMovies%26st.mode%3Down%26st.mrkId%3D%257B%2522uploadedMovieMarker%2522%253A%257B%2522marker%2522%253A%25221519410114503%2522%252C%2522hasMore%2522%253Atrue%257D%252C%2522sharedMovieMarker%2522%253A%257B%2522marker%2522%253Anull%252C%2522hasMore%2522%253Afalse%257D%257D%26st.friendId%3D561722190321%26st.frwd%3Don%26_prevCmd%3DfriendMovies%26tkn%3D7257&st.discType=MOVIE&st.mvId=863789452017&_prevCmd=friendMovies&tkn=3648#lst#',
+ 'only_matching': True,
+ }, {
+ # Paid video
+ 'url': 'https://ok.ru/video/954886983203',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:odnoklassniki|ok)\.ru/videoembed/.+?)\1', webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ start_time = int_or_none(compat_parse_qs(
+ compat_urllib_parse_urlparse(url).query).get('fromTime', [None])[0])
+
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://ok.ru/video/%s' % video_id, video_id)
+
+ error = self._search_regex(
+ r'[^>]+class="vp_video_stub_txt"[^>]*>([^<]+)<',
+ webpage, 'error', default=None)
+ if error:
+ raise ExtractorError(error, expected=True)
+
+ player = self._parse_json(
+ unescapeHTML(self._search_regex(
+ r'data-options=(?P<quote>["\'])(?P<player>{.+?%s.+?})(?P=quote)' % video_id,
+ webpage, 'player', group='player')),
+ video_id)
+
+ flashvars = player['flashvars']
+
+ metadata = flashvars.get('metadata')
+ if metadata:
+ metadata = self._parse_json(metadata, video_id)
+ else:
+ data = {}
+ st_location = flashvars.get('location')
+ if st_location:
+ data['st.location'] = st_location
+ metadata = self._download_json(
+ compat_urllib_parse_unquote(flashvars['metadataUrl']),
+ video_id, 'Downloading metadata JSON',
+ data=urlencode_postdata(data))
+
+ movie = metadata['movie']
+
+ # Some embedded videos may not contain title in movie dict (e.g.
+ # http://ok.ru/video/62036049272859-0) thus we allow missing title
+ # here and it's going to be extracted later by an extractor that
+ # will process the actual embed.
+ provider = metadata.get('provider')
+ title = movie['title'] if provider == 'UPLOADED_ODKL' else movie.get('title')
+
+ thumbnail = movie.get('poster')
+ duration = int_or_none(movie.get('duration'))
+
+ author = metadata.get('author', {})
+ uploader_id = author.get('id')
+ uploader = author.get('name')
+
+ upload_date = unified_strdate(self._html_search_meta(
+ 'ya:ovs:upload_date', webpage, 'upload date', default=None))
+
+ age_limit = None
+ adult = self._html_search_meta(
+ 'ya:ovs:adult', webpage, 'age limit', default=None)
+ if adult:
+ age_limit = 18 if adult == 'true' else 0
+
+ like_count = int_or_none(metadata.get('likeCount'))
+
+ info = {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'upload_date': upload_date,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'like_count': like_count,
+ 'age_limit': age_limit,
+ 'start_time': start_time,
+ }
+
+ if provider == 'USER_YOUTUBE':
+ info.update({
+ '_type': 'url_transparent',
+ 'url': movie['contentId'],
+ })
+ return info
+
+ assert title
+ if provider == 'LIVE_TV_APP':
+ info['title'] = self._live_title(title)
+
+ quality = qualities(('4', '0', '1', '2', '3', '5'))
+
+ formats = [{
+ 'url': f['url'],
+ 'ext': 'mp4',
+ 'format_id': f['name'],
+ } for f in metadata['videos']]
+
+ m3u8_url = metadata.get('hlsManifestUrl')
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+
+ dash_manifest = metadata.get('metadataEmbedded')
+ if dash_manifest:
+ formats.extend(self._parse_mpd_formats(
+ compat_etree_fromstring(dash_manifest), 'mpd'))
+
+ for fmt in formats:
+ fmt_type = self._search_regex(
+ r'\btype[/=](\d)', fmt['url'],
+ 'format type', default=None)
+ if fmt_type:
+ fmt['quality'] = quality(fmt_type)
+
+ # Live formats
+ m3u8_url = metadata.get('hlsMasterPlaylistUrl')
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', entry_protocol='m3u8',
+ m3u8_id='hls', fatal=False))
+ rtmp_url = metadata.get('rtmpUrl')
+ if rtmp_url:
+ formats.append({
+ 'url': rtmp_url,
+ 'format_id': 'rtmp',
+ 'ext': 'flv',
+ })
+
+ if not formats:
+ payment_info = metadata.get('paymentInfo')
+ if payment_info:
+ raise ExtractorError('This video is paid, subscribe to download it', expected=True)
+
+ self._sort_formats(formats)
+
+ info['formats'] = formats
+ return info
diff --git a/youtube_dl/extractor/oktoberfesttv.py b/youtube_dlc/extractor/oktoberfesttv.py
index a914068f9..a914068f9 100644
--- a/youtube_dl/extractor/oktoberfesttv.py
+++ b/youtube_dlc/extractor/oktoberfesttv.py
diff --git a/youtube_dl/extractor/once.py b/youtube_dlc/extractor/once.py
index 3e44b7829..3e44b7829 100644
--- a/youtube_dl/extractor/once.py
+++ b/youtube_dlc/extractor/once.py
diff --git a/youtube_dl/extractor/ondemandkorea.py b/youtube_dlc/extractor/ondemandkorea.py
index df1ce3c1d..df1ce3c1d 100644
--- a/youtube_dl/extractor/ondemandkorea.py
+++ b/youtube_dlc/extractor/ondemandkorea.py
diff --git a/youtube_dlc/extractor/onet.py b/youtube_dlc/extractor/onet.py
new file mode 100644
index 000000000..e55b2ac89
--- /dev/null
+++ b/youtube_dlc/extractor/onet.py
@@ -0,0 +1,268 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ float_or_none,
+ get_element_by_class,
+ int_or_none,
+ js_to_json,
+ NO_DEFAULT,
+ parse_iso8601,
+ remove_start,
+ strip_or_none,
+ url_basename,
+)
+
+
+class OnetBaseIE(InfoExtractor):
+ _URL_BASE_RE = r'https?://(?:(?:www\.)?onet\.tv|onet100\.vod\.pl)/[a-z]/'
+
+ def _search_mvp_id(self, webpage):
+ return self._search_regex(
+ r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id')
+
+ def _extract_from_id(self, video_id, webpage=None):
+ response = self._download_json(
+ 'http://qi.ckm.onetapi.pl/', video_id,
+ query={
+ 'body[id]': video_id,
+ 'body[jsonrpc]': '2.0',
+ 'body[method]': 'get_asset_detail',
+ 'body[params][ID_Publikacji]': video_id,
+ 'body[params][Service]': 'www.onet.pl',
+ 'content-type': 'application/jsonp',
+ 'x-onet-app': 'player.front.onetapi.pl',
+ })
+
+ error = response.get('error')
+ if error:
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, error['message']), expected=True)
+
+ video = response['result'].get('0')
+
+ formats = []
+ for format_type, formats_dict in video['formats'].items():
+ if not isinstance(formats_dict, dict):
+ continue
+ for format_id, format_list in formats_dict.items():
+ if not isinstance(format_list, list):
+ continue
+ for f in format_list:
+ video_url = f.get('url')
+ if not video_url:
+ continue
+ ext = determine_ext(video_url)
+ if format_id.startswith('ism'):
+ formats.extend(self._extract_ism_formats(
+ video_url, video_id, 'mss', fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ video_url, video_id, mpd_id='dash', fatal=False))
+ elif format_id.startswith('hls'):
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ http_f = {
+ 'url': video_url,
+ 'format_id': format_id,
+ 'abr': float_or_none(f.get('audio_bitrate')),
+ }
+ if format_type == 'audio':
+ http_f['vcodec'] = 'none'
+ else:
+ http_f.update({
+ 'height': int_or_none(f.get('vertical_resolution')),
+ 'width': int_or_none(f.get('horizontal_resolution')),
+ 'vbr': float_or_none(f.get('video_bitrate')),
+ })
+ formats.append(http_f)
+ self._sort_formats(formats)
+
+ meta = video.get('meta', {})
+
+ title = (self._og_search_title(
+ webpage, default=None) if webpage else None) or meta['title']
+ description = (self._og_search_description(
+ webpage, default=None) if webpage else None) or meta.get('description')
+ duration = meta.get('length') or meta.get('lenght')
+ timestamp = parse_iso8601(meta.get('addDate'), ' ')
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ }
+
+
+class OnetMVPIE(OnetBaseIE):
+ _VALID_URL = r'onetmvp:(?P<id>\d+\.\d+)'
+
+ _TEST = {
+ 'url': 'onetmvp:381027.1509591944',
+ 'only_matching': True,
+ }
+
+ def _real_extract(self, url):
+ return self._extract_from_id(self._match_id(url))
+
+
+class OnetIE(OnetBaseIE):
+ _VALID_URL = OnetBaseIE._URL_BASE_RE + r'[a-z]+/(?P<display_id>[0-9a-z-]+)/(?P<id>[0-9a-z]+)'
+ IE_NAME = 'onet.tv'
+
+ _TESTS = [{
+ 'url': 'http://onet.tv/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc',
+ 'md5': '436102770fb095c75b8bb0392d3da9ff',
+ 'info_dict': {
+ 'id': 'qbpyqc',
+ 'display_id': 'open-er-festival-2016-najdziwniejsze-wymagania-gwiazd',
+ 'ext': 'mp4',
+ 'title': 'Open\'er Festival 2016: najdziwniejsze wymagania gwiazd',
+ 'description': 'Trzy samochody, których nigdy nie użyto, prywatne spa, hotel dekorowany czarnym suknem czy nielegalne używki. Organizatorzy koncertów i festiwali muszą stawać przed nie lada wyzwaniem zapraszając gwia...',
+ 'upload_date': '20160705',
+ 'timestamp': 1467721580,
+ },
+ }, {
+ 'url': 'https://onet100.vod.pl/k/openerfestival/open-er-festival-2016-najdziwniejsze-wymagania-gwiazd/qbpyqc',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id, video_id = mobj.group('display_id', 'id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ mvp_id = self._search_mvp_id(webpage)
+
+ info_dict = self._extract_from_id(mvp_id, webpage)
+ info_dict.update({
+ 'id': video_id,
+ 'display_id': display_id,
+ })
+
+ return info_dict
+
+
+class OnetChannelIE(OnetBaseIE):
+ _VALID_URL = OnetBaseIE._URL_BASE_RE + r'(?P<id>[a-z]+)(?:[?#]|$)'
+ IE_NAME = 'onet.tv:channel'
+
+ _TESTS = [{
+ 'url': 'http://onet.tv/k/openerfestival',
+ 'info_dict': {
+ 'id': 'openerfestival',
+ 'title': "Open'er Festival",
+ 'description': "Tak było na Open'er Festival 2016! Oglądaj nasze reportaże i wywiady z artystami.",
+ },
+ 'playlist_mincount': 35,
+ }, {
+ 'url': 'https://onet100.vod.pl/k/openerfestival',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, channel_id)
+
+ current_clip_info = self._parse_json(self._search_regex(
+ r'var\s+currentClip\s*=\s*({[^}]+})', webpage, 'video info'), channel_id,
+ transform_source=lambda s: js_to_json(re.sub(r'\'\s*\+\s*\'', '', s)))
+ video_id = remove_start(current_clip_info['ckmId'], 'mvp:')
+ video_name = url_basename(current_clip_info['url'])
+
+ if self._downloader.params.get('noplaylist'):
+ self.to_screen(
+ 'Downloading just video %s because of --no-playlist' % video_name)
+ return self._extract_from_id(video_id, webpage)
+
+ self.to_screen(
+ 'Downloading channel %s - add --no-playlist to just download video %s' % (
+ channel_id, video_name))
+ matches = re.findall(
+ r'<a[^>]+href=[\'"](%s[a-z]+/[0-9a-z-]+/[0-9a-z]+)' % self._URL_BASE_RE,
+ webpage)
+ entries = [
+ self.url_result(video_link, OnetIE.ie_key())
+ for video_link in matches]
+
+ channel_title = strip_or_none(get_element_by_class('o_channelName', webpage))
+ channel_description = strip_or_none(get_element_by_class('o_channelDesc', webpage))
+ return self.playlist_result(entries, channel_id, channel_title, channel_description)
+
+
+class OnetPlIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+\.)?(?:onet|businessinsider\.com|plejada)\.pl/(?:[^/]+/)+(?P<id>[0-9a-z]+)'
+ IE_NAME = 'onet.pl'
+
+ _TESTS = [{
+ 'url': 'http://eurosport.onet.pl/zimowe/skoki-narciarskie/ziobro-wygral-kwalifikacje-w-pjongczangu/9ckrly',
+ 'md5': 'b94021eb56214c3969380388b6e73cb0',
+ 'info_dict': {
+ 'id': '1561707.1685479',
+ 'ext': 'mp4',
+ 'title': 'Ziobro wygrał kwalifikacje w Pjongczangu',
+ 'description': 'md5:61fb0740084d2d702ea96512a03585b4',
+ 'upload_date': '20170214',
+ 'timestamp': 1487078046,
+ },
+ }, {
+ # embedded via pulsembed
+ 'url': 'http://film.onet.pl/pensjonat-nad-rozlewiskiem-relacja-z-planu-serialu/y428n0',
+ 'info_dict': {
+ 'id': '501235.965429946',
+ 'ext': 'mp4',
+ 'title': '"Pensjonat nad rozlewiskiem": relacja z planu serialu',
+ 'upload_date': '20170622',
+ 'timestamp': 1498159955,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://film.onet.pl/zwiastuny/ghost-in-the-shell-drugi-zwiastun-pl/5q6yl3',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://moto.onet.pl/jak-wybierane-sa-miejsca-na-fotoradary/6rs04e',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://businessinsider.com.pl/wideo/scenariusz-na-koniec-swiata-wedlug-nasa/dwnqptk',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://plejada.pl/weronika-rosati-o-swoim-domniemanym-slubie/n2bq89',
+ 'only_matching': True,
+ }]
+
+ def _search_mvp_id(self, webpage, default=NO_DEFAULT):
+ return self._search_regex(
+ r'data-(?:params-)?mvp=["\'](\d+\.\d+)', webpage, 'mvp id',
+ default=default)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ mvp_id = self._search_mvp_id(webpage, default=None)
+
+ if not mvp_id:
+ pulsembed_url = self._search_regex(
+ r'data-src=(["\'])(?P<url>(?:https?:)?//pulsembed\.eu/.+?)\1',
+ webpage, 'pulsembed url', group='url')
+ webpage = self._download_webpage(
+ pulsembed_url, video_id, 'Downloading pulsembed webpage')
+ mvp_id = self._search_mvp_id(webpage)
+
+ return self.url_result(
+ 'onetmvp:%s' % mvp_id, OnetMVPIE.ie_key(), video_id=mvp_id)
diff --git a/youtube_dlc/extractor/onionstudios.py b/youtube_dlc/extractor/onionstudios.py
new file mode 100644
index 000000000..cf5c39e66
--- /dev/null
+++ b/youtube_dlc/extractor/onionstudios.py
@@ -0,0 +1,53 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import js_to_json
+
+
+class OnionStudiosIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?onionstudios\.com/(?:video(?:s/[^/]+-|/)|embed\?.*\bid=)(?P<id>\d+)(?!-)'
+
+ _TESTS = [{
+ 'url': 'http://www.onionstudios.com/videos/hannibal-charges-forward-stops-for-a-cocktail-2937',
+ 'md5': '5a118d466d62b5cd03647cf2c593977f',
+ 'info_dict': {
+ 'id': '3459881',
+ 'ext': 'mp4',
+ 'title': 'Hannibal charges forward, stops for a cocktail',
+ 'description': 'md5:545299bda6abf87e5ec666548c6a9448',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'a.v. club',
+ 'upload_date': '20150619',
+ 'timestamp': 1434728546,
+ },
+ }, {
+ 'url': 'http://www.onionstudios.com/embed?id=2855&autoplay=true',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.onionstudios.com/video/6139.json',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'(?s)<(?:iframe|bulbs-video)[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?onionstudios\.com/(?:embed.+?|video/\d+\.json))\1', webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://onionstudios.com/embed/dc94dc2899fe644c0e7241fa04c1b732.js',
+ video_id)
+ mcp_id = compat_str(self._parse_json(self._search_regex(
+ r'window\.mcpMapping\s*=\s*({.+?});', webpage,
+ 'MCP Mapping'), video_id, js_to_json)[video_id]['mcp_id'])
+ return self.url_result(
+ 'http://kinja.com/ajax/inset/iframe?id=mcp-' + mcp_id,
+ 'KinjaEmbed', mcp_id)
diff --git a/youtube_dlc/extractor/ooyala.py b/youtube_dlc/extractor/ooyala.py
new file mode 100644
index 000000000..eb957b8fe
--- /dev/null
+++ b/youtube_dlc/extractor/ooyala.py
@@ -0,0 +1,210 @@
+from __future__ import unicode_literals
+
+import base64
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_str,
+)
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ try_get,
+ unsmuggle_url,
+)
+
+
+class OoyalaBaseIE(InfoExtractor):
+ _PLAYER_BASE = 'http://player.ooyala.com/'
+ _CONTENT_TREE_BASE = _PLAYER_BASE + 'player_api/v1/content_tree/'
+ _AUTHORIZATION_URL_TEMPLATE = _PLAYER_BASE + 'sas/player_api/v2/authorization/embed_code/%s/%s'
+
+ def _extract(self, content_tree_url, video_id, domain=None, supportedformats=None, embed_token=None):
+ content_tree = self._download_json(content_tree_url, video_id)['content_tree']
+ metadata = content_tree[list(content_tree)[0]]
+ embed_code = metadata['embed_code']
+ pcode = metadata.get('asset_pcode') or embed_code
+ title = metadata['title']
+
+ auth_data = self._download_json(
+ self._AUTHORIZATION_URL_TEMPLATE % (pcode, embed_code),
+ video_id, headers=self.geo_verification_headers(), query={
+ 'domain': domain or 'player.ooyala.com',
+ 'supportedFormats': supportedformats or 'mp4,rtmp,m3u8,hds,dash,smooth',
+ 'embedToken': embed_token,
+ })['authorization_data'][embed_code]
+
+ urls = []
+ formats = []
+ streams = auth_data.get('streams') or [{
+ 'delivery_type': 'hls',
+ 'url': {
+ 'data': base64.b64encode(('http://player.ooyala.com/hls/player/all/%s.m3u8' % embed_code).encode()).decode(),
+ }
+ }]
+ for stream in streams:
+ url_data = try_get(stream, lambda x: x['url']['data'], compat_str)
+ if not url_data:
+ continue
+ s_url = compat_b64decode(url_data).decode('utf-8')
+ if not s_url or s_url in urls:
+ continue
+ urls.append(s_url)
+ ext = determine_ext(s_url, None)
+ delivery_type = stream.get('delivery_type')
+ if delivery_type == 'hls' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ re.sub(r'/ip(?:ad|hone)/', '/all/', s_url), embed_code, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif delivery_type == 'hds' or ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ s_url + '?hdcore=3.7.0', embed_code, f4m_id='hds', fatal=False))
+ elif delivery_type == 'dash' or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ s_url, embed_code, mpd_id='dash', fatal=False))
+ elif delivery_type == 'smooth':
+ self._extract_ism_formats(
+ s_url, embed_code, ism_id='mss', fatal=False)
+ elif ext == 'smil':
+ formats.extend(self._extract_smil_formats(
+ s_url, embed_code, fatal=False))
+ else:
+ formats.append({
+ 'url': s_url,
+ 'ext': ext or delivery_type,
+ 'vcodec': stream.get('video_codec'),
+ 'format_id': delivery_type,
+ 'width': int_or_none(stream.get('width')),
+ 'height': int_or_none(stream.get('height')),
+ 'abr': int_or_none(stream.get('audio_bitrate')),
+ 'vbr': int_or_none(stream.get('video_bitrate')),
+ 'fps': float_or_none(stream.get('framerate')),
+ })
+ if not formats and not auth_data.get('authorized'):
+ raise ExtractorError('%s said: %s' % (
+ self.IE_NAME, auth_data['message']), expected=True)
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for lang, sub in metadata.get('closed_captions_vtt', {}).get('captions', {}).items():
+ sub_url = sub.get('url')
+ if not sub_url:
+ continue
+ subtitles[lang] = [{
+ 'url': sub_url,
+ }]
+
+ return {
+ 'id': embed_code,
+ 'title': title,
+ 'description': metadata.get('description'),
+ 'thumbnail': metadata.get('thumbnail_image') or metadata.get('promo_image'),
+ 'duration': float_or_none(metadata.get('duration'), 1000),
+ 'subtitles': subtitles,
+ 'formats': formats,
+ }
+
+
+class OoyalaIE(OoyalaBaseIE):
+ _VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P<id>.+?)(&|$)'
+
+ _TESTS = [
+ {
+ # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
+ 'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
+ 'info_dict': {
+ 'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
+ 'ext': 'mp4',
+ 'title': 'Explaining Data Recovery from Hard Drives and SSDs',
+ 'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
+ 'duration': 853.386,
+ },
+ # The video in the original webpage now uses PlayWire
+ 'skip': 'Ooyala said: movie expired',
+ }, {
+ # Only available for ipad
+ 'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
+ 'info_dict': {
+ 'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
+ 'ext': 'mp4',
+ 'title': 'Simulation Overview - Levels of Simulation',
+ 'duration': 194.948,
+ },
+ },
+ {
+ # Information available only through SAS api
+ # From http://community.plm.automation.siemens.com/t5/News-NX-Manufacturing/Tool-Path-Divide/ba-p/4187
+ 'url': 'http://player.ooyala.com/player.js?embedCode=FiOG81ZTrvckcchQxmalf4aQj590qTEx',
+ 'md5': 'a84001441b35ea492bc03736e59e7935',
+ 'info_dict': {
+ 'id': 'FiOG81ZTrvckcchQxmalf4aQj590qTEx',
+ 'ext': 'mp4',
+ 'title': 'Divide Tool Path.mp4',
+ 'duration': 204.405,
+ }
+ },
+ {
+ # empty stream['url']['data']
+ 'url': 'http://player.ooyala.com/player.js?embedCode=w2bnZtYjE6axZ_dw1Cd0hQtXd_ige2Is',
+ 'only_matching': True,
+ }
+ ]
+
+ @staticmethod
+ def _url_for_embed_code(embed_code):
+ return 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code
+
+ @classmethod
+ def _build_url_result(cls, embed_code):
+ return cls.url_result(cls._url_for_embed_code(embed_code),
+ ie=cls.ie_key())
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+ embed_code = self._match_id(url)
+ domain = smuggled_data.get('domain')
+ supportedformats = smuggled_data.get('supportedformats')
+ embed_token = smuggled_data.get('embed_token')
+ content_tree_url = self._CONTENT_TREE_BASE + 'embed_code/%s/%s' % (embed_code, embed_code)
+ return self._extract(content_tree_url, embed_code, domain, supportedformats, embed_token)
+
+
+class OoyalaExternalIE(OoyalaBaseIE):
+ _VALID_URL = r'''(?x)
+ (?:
+ ooyalaexternal:|
+ https?://.+?\.ooyala\.com/.*?\bexternalId=
+ )
+ (?P<partner_id>[^:]+)
+ :
+ (?P<id>.+)
+ (?:
+ :|
+ .*?&pcode=
+ )
+ (?P<pcode>.+?)
+ (?:&|$)
+ '''
+
+ _TEST = {
+ 'url': 'https://player.ooyala.com/player.js?externalId=espn:10365079&pcode=1kNG061cgaoolOncv54OAO1ceO-I&adSetCode=91cDU6NuXTGKz3OdjOxFdAgJVtQcKJnI&callback=handleEvents&hasModuleParams=1&height=968&playerBrandingId=7af3bd04449c444c964f347f11873075&targetReplaceId=videoPlayer&width=1656&wmode=opaque&allowScriptAccess=always',
+ 'info_dict': {
+ 'id': 'FkYWtmazr6Ed8xmvILvKLWjd4QvYZpzG',
+ 'ext': 'mp4',
+ 'title': 'dm_140128_30for30Shorts___JudgingJewellv2',
+ 'duration': 1302.0,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ partner_id, video_id, pcode = re.match(self._VALID_URL, url).groups()
+ content_tree_url = self._CONTENT_TREE_BASE + 'external_id/%s/%s:%s' % (pcode, partner_id, video_id)
+ return self._extract(content_tree_url, video_id)
diff --git a/youtube_dlc/extractor/openload.py b/youtube_dlc/extractor/openload.py
new file mode 100644
index 000000000..0c20d0177
--- /dev/null
+++ b/youtube_dlc/extractor/openload.py
@@ -0,0 +1,238 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import os
+import subprocess
+import tempfile
+
+from ..compat import (
+ compat_urlparse,
+ compat_kwargs,
+)
+from ..utils import (
+ check_executable,
+ encodeArgument,
+ ExtractorError,
+ get_exe_version,
+ is_outdated_version,
+ std_headers,
+)
+
+
+def cookie_to_dict(cookie):
+ cookie_dict = {
+ 'name': cookie.name,
+ 'value': cookie.value,
+ }
+ if cookie.port_specified:
+ cookie_dict['port'] = cookie.port
+ if cookie.domain_specified:
+ cookie_dict['domain'] = cookie.domain
+ if cookie.path_specified:
+ cookie_dict['path'] = cookie.path
+ if cookie.expires is not None:
+ cookie_dict['expires'] = cookie.expires
+ if cookie.secure is not None:
+ cookie_dict['secure'] = cookie.secure
+ if cookie.discard is not None:
+ cookie_dict['discard'] = cookie.discard
+ try:
+ if (cookie.has_nonstandard_attr('httpOnly')
+ or cookie.has_nonstandard_attr('httponly')
+ or cookie.has_nonstandard_attr('HttpOnly')):
+ cookie_dict['httponly'] = True
+ except TypeError:
+ pass
+ return cookie_dict
+
+
+def cookie_jar_to_list(cookie_jar):
+ return [cookie_to_dict(cookie) for cookie in cookie_jar]
+
+
+class PhantomJSwrapper(object):
+ """PhantomJS wrapper class
+
+ This class is experimental.
+ """
+
+ _TEMPLATE = r'''
+ phantom.onError = function(msg, trace) {{
+ var msgStack = ['PHANTOM ERROR: ' + msg];
+ if(trace && trace.length) {{
+ msgStack.push('TRACE:');
+ trace.forEach(function(t) {{
+ msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line
+ + (t.function ? ' (in function ' + t.function +')' : ''));
+ }});
+ }}
+ console.error(msgStack.join('\n'));
+ phantom.exit(1);
+ }};
+ var page = require('webpage').create();
+ var fs = require('fs');
+ var read = {{ mode: 'r', charset: 'utf-8' }};
+ var write = {{ mode: 'w', charset: 'utf-8' }};
+ JSON.parse(fs.read("{cookies}", read)).forEach(function(x) {{
+ phantom.addCookie(x);
+ }});
+ page.settings.resourceTimeout = {timeout};
+ page.settings.userAgent = "{ua}";
+ page.onLoadStarted = function() {{
+ page.evaluate(function() {{
+ delete window._phantom;
+ delete window.callPhantom;
+ }});
+ }};
+ var saveAndExit = function() {{
+ fs.write("{html}", page.content, write);
+ fs.write("{cookies}", JSON.stringify(phantom.cookies), write);
+ phantom.exit();
+ }};
+ page.onLoadFinished = function(status) {{
+ if(page.url === "") {{
+ page.setContent(fs.read("{html}", read), "{url}");
+ }}
+ else {{
+ {jscode}
+ }}
+ }};
+ page.open("");
+ '''
+
+ _TMP_FILE_NAMES = ['script', 'html', 'cookies']
+
+ @staticmethod
+ def _version():
+ return get_exe_version('phantomjs', version_re=r'([0-9.]+)')
+
+ def __init__(self, extractor, required_version=None, timeout=10000):
+ self._TMP_FILES = {}
+
+ self.exe = check_executable('phantomjs', ['-v'])
+ if not self.exe:
+ raise ExtractorError('PhantomJS executable not found in PATH, '
+ 'download it from http://phantomjs.org',
+ expected=True)
+
+ self.extractor = extractor
+
+ if required_version:
+ version = self._version()
+ if is_outdated_version(version, required_version):
+ self.extractor._downloader.report_warning(
+ 'Your copy of PhantomJS is outdated, update it to version '
+ '%s or newer if you encounter any errors.' % required_version)
+
+ self.options = {
+ 'timeout': timeout,
+ }
+ for name in self._TMP_FILE_NAMES:
+ tmp = tempfile.NamedTemporaryFile(delete=False)
+ tmp.close()
+ self._TMP_FILES[name] = tmp
+
+ def __del__(self):
+ for name in self._TMP_FILE_NAMES:
+ try:
+ os.remove(self._TMP_FILES[name].name)
+ except (IOError, OSError, KeyError):
+ pass
+
+ def _save_cookies(self, url):
+ cookies = cookie_jar_to_list(self.extractor._downloader.cookiejar)
+ for cookie in cookies:
+ if 'path' not in cookie:
+ cookie['path'] = '/'
+ if 'domain' not in cookie:
+ cookie['domain'] = compat_urlparse.urlparse(url).netloc
+ with open(self._TMP_FILES['cookies'].name, 'wb') as f:
+ f.write(json.dumps(cookies).encode('utf-8'))
+
+ def _load_cookies(self):
+ with open(self._TMP_FILES['cookies'].name, 'rb') as f:
+ cookies = json.loads(f.read().decode('utf-8'))
+ for cookie in cookies:
+ if cookie['httponly'] is True:
+ cookie['rest'] = {'httpOnly': None}
+ if 'expiry' in cookie:
+ cookie['expire_time'] = cookie['expiry']
+ self.extractor._set_cookie(**compat_kwargs(cookie))
+
+ def get(self, url, html=None, video_id=None, note=None, note2='Executing JS on webpage', headers={}, jscode='saveAndExit();'):
+ """
+ Downloads webpage (if needed) and executes JS
+
+ Params:
+ url: website url
+ html: optional, html code of website
+ video_id: video id
+ note: optional, displayed when downloading webpage
+ note2: optional, displayed when executing JS
+ headers: custom http headers
+ jscode: code to be executed when page is loaded
+
+ Returns tuple with:
+ * downloaded website (after JS execution)
+ * anything you print with `console.log` (but not inside `page.execute`!)
+
+ In most cases you don't need to add any `jscode`.
+ It is executed in `page.onLoadFinished`.
+ `saveAndExit();` is mandatory, use it instead of `phantom.exit()`
+ It is possible to wait for some element on the webpage, for example:
+ var check = function() {
+ var elementFound = page.evaluate(function() {
+ return document.querySelector('#b.done') !== null;
+ });
+ if(elementFound)
+ saveAndExit();
+ else
+ window.setTimeout(check, 500);
+ }
+
+ page.evaluate(function(){
+ document.querySelector('#a').click();
+ });
+ check();
+ """
+ if 'saveAndExit();' not in jscode:
+ raise ExtractorError('`saveAndExit();` not found in `jscode`')
+ if not html:
+ html = self.extractor._download_webpage(url, video_id, note=note, headers=headers)
+ with open(self._TMP_FILES['html'].name, 'wb') as f:
+ f.write(html.encode('utf-8'))
+
+ self._save_cookies(url)
+
+ replaces = self.options
+ replaces['url'] = url
+ user_agent = headers.get('User-Agent') or std_headers['User-Agent']
+ replaces['ua'] = user_agent.replace('"', '\\"')
+ replaces['jscode'] = jscode
+
+ for x in self._TMP_FILE_NAMES:
+ replaces[x] = self._TMP_FILES[x].name.replace('\\', '\\\\').replace('"', '\\"')
+
+ with open(self._TMP_FILES['script'].name, 'wb') as f:
+ f.write(self._TEMPLATE.format(**replaces).encode('utf-8'))
+
+ if video_id is None:
+ self.extractor.to_screen('%s' % (note2,))
+ else:
+ self.extractor.to_screen('%s: %s' % (video_id, note2))
+
+ p = subprocess.Popen([
+ self.exe, '--ssl-protocol=any',
+ self._TMP_FILES['script'].name
+ ], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ out, err = p.communicate()
+ if p.returncode != 0:
+ raise ExtractorError(
+ 'Executing JS failed\n:' + encodeArgument(err))
+ with open(self._TMP_FILES['html'].name, 'rb') as f:
+ html = f.read().decode('utf-8')
+
+ self._load_cookies()
+
+ return (html, encodeArgument(out))
diff --git a/youtube_dl/extractor/ora.py b/youtube_dlc/extractor/ora.py
index 1d42be39b..1d42be39b 100644
--- a/youtube_dl/extractor/ora.py
+++ b/youtube_dlc/extractor/ora.py
diff --git a/youtube_dlc/extractor/orf.py b/youtube_dlc/extractor/orf.py
new file mode 100644
index 000000000..700ce448c
--- /dev/null
+++ b/youtube_dlc/extractor/orf.py
@@ -0,0 +1,570 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ clean_html,
+ determine_ext,
+ float_or_none,
+ HEADRequest,
+ int_or_none,
+ orderedSet,
+ remove_end,
+ str_or_none,
+ strip_jsonp,
+ unescapeHTML,
+ unified_strdate,
+ url_or_none,
+)
+
+
+class ORFTVthekIE(InfoExtractor):
+ IE_NAME = 'orf:tvthek'
+ IE_DESC = 'ORF TVthek'
+ _VALID_URL = r'https?://tvthek\.orf\.at/(?:[^/]+/)+(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389',
+ 'playlist': [{
+ 'md5': '2942210346ed779588f428a92db88712',
+ 'info_dict': {
+ 'id': '8896777',
+ 'ext': 'mp4',
+ 'title': 'Aufgetischt: Mit der Steirischen Tafelrunde',
+ 'description': 'md5:c1272f0245537812d4e36419c207b67d',
+ 'duration': 2668,
+ 'upload_date': '20141208',
+ },
+ }],
+ 'skip': 'Blocked outside of Austria / Germany',
+ }, {
+ 'url': 'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256',
+ 'info_dict': {
+ 'id': '7982259',
+ 'ext': 'mp4',
+ 'title': 'Best of Ingrid Thurnher',
+ 'upload_date': '20140527',
+ 'description': 'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im Jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".',
+ },
+ 'params': {
+ 'skip_download': True, # rtsp downloads
+ },
+ 'skip': 'Blocked outside of Austria / Germany',
+ }, {
+ 'url': 'http://tvthek.orf.at/topic/Fluechtlingskrise/10463081/Heimat-Fremde-Heimat/13879132/Senioren-betreuen-Migrantenkinder/13879141',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://tvthek.orf.at/profile/Universum/35429',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ webpage = self._download_webpage(url, playlist_id)
+
+ data_jsb = self._parse_json(
+ self._search_regex(
+ r'<div[^>]+class=(["\']).*?VideoPlaylist.*?\1[^>]+data-jsb=(["\'])(?P<json>.+?)\2',
+ webpage, 'playlist', group='json'),
+ playlist_id, transform_source=unescapeHTML)['playlist']['videos']
+
+ entries = []
+ for sd in data_jsb:
+ video_id, title = sd.get('id'), sd.get('title')
+ if not video_id or not title:
+ continue
+ video_id = compat_str(video_id)
+ formats = []
+ for fd in sd['sources']:
+ src = url_or_none(fd.get('src'))
+ if not src:
+ continue
+ format_id_list = []
+ for key in ('delivery', 'quality', 'quality_string'):
+ value = fd.get(key)
+ if value:
+ format_id_list.append(value)
+ format_id = '-'.join(format_id_list)
+ ext = determine_ext(src)
+ if ext == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(
+ src, video_id, 'mp4', m3u8_id=format_id, fatal=False)
+ if any('/geoprotection' in f['url'] for f in m3u8_formats):
+ self.raise_geo_restricted()
+ formats.extend(m3u8_formats)
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ src, video_id, f4m_id=format_id, fatal=False))
+ else:
+ formats.append({
+ 'format_id': format_id,
+ 'url': src,
+ 'protocol': fd.get('protocol'),
+ })
+
+ # Check for geoblocking.
+ # There is a property is_geoprotection, but that's always false
+ geo_str = sd.get('geoprotection_string')
+ if geo_str:
+ try:
+ http_url = next(
+ f['url']
+ for f in formats
+ if re.match(r'^https?://.*\.mp4$', f['url']))
+ except StopIteration:
+ pass
+ else:
+ req = HEADRequest(http_url)
+ self._request_webpage(
+ req, video_id,
+ note='Testing for geoblocking',
+ errnote=((
+ 'This video seems to be blocked outside of %s. '
+ 'You may want to try the streaming-* formats.')
+ % geo_str),
+ fatal=False)
+
+ self._check_formats(formats, video_id)
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for sub in sd.get('subtitles', []):
+ sub_src = sub.get('src')
+ if not sub_src:
+ continue
+ subtitles.setdefault(sub.get('lang', 'de-AT'), []).append({
+ 'url': sub_src,
+ })
+
+ upload_date = unified_strdate(sd.get('created_date'))
+ entries.append({
+ '_type': 'video',
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'description': sd.get('description'),
+ 'duration': int_or_none(sd.get('duration_in_seconds')),
+ 'upload_date': upload_date,
+ 'thumbnail': sd.get('image_full_url'),
+ })
+
+ return {
+ '_type': 'playlist',
+ 'entries': entries,
+ 'id': playlist_id,
+ }
+
+
+class ORFRadioIE(InfoExtractor):
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ show_date = mobj.group('date')
+ show_id = mobj.group('show')
+
+ data = self._download_json(
+ 'http://audioapi.orf.at/%s/api/json/current/broadcast/%s/%s'
+ % (self._API_STATION, show_id, show_date), show_id)
+
+ entries = []
+ for info in data['streams']:
+ loop_stream_id = str_or_none(info.get('loopStreamId'))
+ if not loop_stream_id:
+ continue
+ title = str_or_none(data.get('title'))
+ if not title:
+ continue
+ start = int_or_none(info.get('start'), scale=1000)
+ end = int_or_none(info.get('end'), scale=1000)
+ duration = end - start if end and start else None
+ entries.append({
+ 'id': loop_stream_id.replace('.mp3', ''),
+ 'url': 'http://loopstream01.apa.at/?channel=%s&id=%s' % (self._LOOP_STATION, loop_stream_id),
+ 'title': title,
+ 'description': clean_html(data.get('subtitle')),
+ 'duration': duration,
+ 'timestamp': start,
+ 'ext': 'mp3',
+ 'series': data.get('programTitle'),
+ })
+
+ return {
+ '_type': 'playlist',
+ 'id': show_id,
+ 'title': data.get('title'),
+ 'description': clean_html(data.get('subtitle')),
+ 'entries': entries,
+ }
+
+
+class ORFFM4IE(ORFRadioIE):
+ IE_NAME = 'orf:fm4'
+ IE_DESC = 'radio FM4'
+ _VALID_URL = r'https?://(?P<station>fm4)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>4\w+)'
+ _API_STATION = 'fm4'
+ _LOOP_STATION = 'fm4'
+
+ _TEST = {
+ 'url': 'http://fm4.orf.at/player/20170107/4CC',
+ 'md5': '2b0be47375432a7ef104453432a19212',
+ 'info_dict': {
+ 'id': '2017-01-07_2100_tl_54_7DaysSat18_31295',
+ 'ext': 'mp3',
+ 'title': 'Solid Steel Radioshow',
+ 'description': 'Die Mixshow von Coldcut und Ninja Tune.',
+ 'duration': 3599,
+ 'timestamp': 1483819257,
+ 'upload_date': '20170107',
+ },
+ 'skip': 'Shows from ORF radios are only available for 7 days.',
+ 'only_matching': True,
+ }
+
+
+class ORFNOEIE(ORFRadioIE):
+ IE_NAME = 'orf:noe'
+ IE_DESC = 'Radio Niederösterreich'
+ _VALID_URL = r'https?://(?P<station>noe)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'noe'
+ _LOOP_STATION = 'oe2n'
+
+ _TEST = {
+ 'url': 'https://noe.orf.at/player/20200423/NGM',
+ 'only_matching': True,
+ }
+
+
+class ORFWIEIE(ORFRadioIE):
+ IE_NAME = 'orf:wien'
+ IE_DESC = 'Radio Wien'
+ _VALID_URL = r'https?://(?P<station>wien)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'wie'
+ _LOOP_STATION = 'oe2w'
+
+ _TEST = {
+ 'url': 'https://wien.orf.at/player/20200423/WGUM',
+ 'only_matching': True,
+ }
+
+
+class ORFBGLIE(ORFRadioIE):
+ IE_NAME = 'orf:burgenland'
+ IE_DESC = 'Radio Burgenland'
+ _VALID_URL = r'https?://(?P<station>burgenland)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'bgl'
+ _LOOP_STATION = 'oe2b'
+
+ _TEST = {
+ 'url': 'https://burgenland.orf.at/player/20200423/BGM',
+ 'only_matching': True,
+ }
+
+
+class ORFOOEIE(ORFRadioIE):
+ IE_NAME = 'orf:oberoesterreich'
+ IE_DESC = 'Radio Oberösterreich'
+ _VALID_URL = r'https?://(?P<station>ooe)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'ooe'
+ _LOOP_STATION = 'oe2o'
+
+ _TEST = {
+ 'url': 'https://ooe.orf.at/player/20200423/OGMO',
+ 'only_matching': True,
+ }
+
+
+class ORFSTMIE(ORFRadioIE):
+ IE_NAME = 'orf:steiermark'
+ IE_DESC = 'Radio Steiermark'
+ _VALID_URL = r'https?://(?P<station>steiermark)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'stm'
+ _LOOP_STATION = 'oe2st'
+
+ _TEST = {
+ 'url': 'https://steiermark.orf.at/player/20200423/STGMS',
+ 'only_matching': True,
+ }
+
+
+class ORFKTNIE(ORFRadioIE):
+ IE_NAME = 'orf:kaernten'
+ IE_DESC = 'Radio Kärnten'
+ _VALID_URL = r'https?://(?P<station>kaernten)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'ktn'
+ _LOOP_STATION = 'oe2k'
+
+ _TEST = {
+ 'url': 'https://kaernten.orf.at/player/20200423/KGUMO',
+ 'only_matching': True,
+ }
+
+
+class ORFSBGIE(ORFRadioIE):
+ IE_NAME = 'orf:salzburg'
+ IE_DESC = 'Radio Salzburg'
+ _VALID_URL = r'https?://(?P<station>salzburg)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'sbg'
+ _LOOP_STATION = 'oe2s'
+
+ _TEST = {
+ 'url': 'https://salzburg.orf.at/player/20200423/SGUM',
+ 'only_matching': True,
+ }
+
+
+class ORFTIRIE(ORFRadioIE):
+ IE_NAME = 'orf:tirol'
+ IE_DESC = 'Radio Tirol'
+ _VALID_URL = r'https?://(?P<station>tirol)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'tir'
+ _LOOP_STATION = 'oe2t'
+
+ _TEST = {
+ 'url': 'https://tirol.orf.at/player/20200423/TGUMO',
+ 'only_matching': True,
+ }
+
+
+class ORFVBGIE(ORFRadioIE):
+ IE_NAME = 'orf:vorarlberg'
+ IE_DESC = 'Radio Vorarlberg'
+ _VALID_URL = r'https?://(?P<station>vorarlberg)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'vbg'
+ _LOOP_STATION = 'oe2v'
+
+ _TEST = {
+ 'url': 'https://vorarlberg.orf.at/player/20200423/VGUM',
+ 'only_matching': True,
+ }
+
+
+class ORFOE3IE(ORFRadioIE):
+ IE_NAME = 'orf:oe3'
+ IE_DESC = 'Radio Österreich 3'
+ _VALID_URL = r'https?://(?P<station>oe3)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'oe3'
+ _LOOP_STATION = 'oe3'
+
+ _TEST = {
+ 'url': 'https://oe3.orf.at/player/20200424/3WEK',
+ 'only_matching': True,
+ }
+
+
+class ORFOE1IE(ORFRadioIE):
+ IE_NAME = 'orf:oe1'
+ IE_DESC = 'Radio Österreich 1'
+ _VALID_URL = r'https?://(?P<station>oe1)\.orf\.at/player/(?P<date>[0-9]+)/(?P<show>\w+)'
+ _API_STATION = 'oe1'
+ _LOOP_STATION = 'oe1'
+
+ _TEST = {
+ 'url': 'http://oe1.orf.at/player/20170108/456544',
+ 'md5': '34d8a6e67ea888293741c86a099b745b',
+ 'info_dict': {
+ 'id': '2017-01-08_0759_tl_51_7DaysSun6_256141',
+ 'ext': 'mp3',
+ 'title': 'Morgenjournal',
+ 'duration': 609,
+ 'timestamp': 1483858796,
+ 'upload_date': '20170108',
+ },
+ 'skip': 'Shows from ORF radios are only available for 7 days.'
+ }
+
+
+class ORFIPTVIE(InfoExtractor):
+ IE_NAME = 'orf:iptv'
+ IE_DESC = 'iptv.ORF.at'
+ _VALID_URL = r'https?://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://iptv.orf.at/stories/2275236/',
+ 'md5': 'c8b22af4718a4b4af58342529453e3e5',
+ 'info_dict': {
+ 'id': '350612',
+ 'ext': 'flv',
+ 'title': 'Weitere Evakuierungen um Vulkan Calbuco',
+ 'description': 'md5:d689c959bdbcf04efeddedbf2299d633',
+ 'duration': 68.197,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20150425',
+ },
+ }
+
+ def _real_extract(self, url):
+ story_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://iptv.orf.at/stories/%s' % story_id, story_id)
+
+ video_id = self._search_regex(
+ r'data-video(?:id)?="(\d+)"', webpage, 'video id')
+
+ data = self._download_json(
+ 'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
+ video_id)[0]
+
+ duration = float_or_none(data['duration'], 1000)
+
+ video = data['sources']['default']
+ load_balancer_url = video['loadBalancerUrl']
+ abr = int_or_none(video.get('audioBitrate'))
+ vbr = int_or_none(video.get('bitrate'))
+ fps = int_or_none(video.get('videoFps'))
+ width = int_or_none(video.get('videoWidth'))
+ height = int_or_none(video.get('videoHeight'))
+ thumbnail = video.get('preview')
+
+ rendition = self._download_json(
+ load_balancer_url, video_id, transform_source=strip_jsonp)
+
+ f = {
+ 'abr': abr,
+ 'vbr': vbr,
+ 'fps': fps,
+ 'width': width,
+ 'height': height,
+ }
+
+ formats = []
+ for format_id, format_url in rendition['redirect'].items():
+ if format_id == 'rtmp':
+ ff = f.copy()
+ ff.update({
+ 'url': format_url,
+ 'format_id': format_id,
+ })
+ formats.append(ff)
+ elif determine_ext(format_url) == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ format_url, video_id, f4m_id=format_id))
+ elif determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', m3u8_id=format_id))
+ else:
+ continue
+ self._sort_formats(formats)
+
+ title = remove_end(self._og_search_title(webpage), ' - iptv.ORF.at')
+ description = self._og_search_description(webpage)
+ upload_date = unified_strdate(self._html_search_meta(
+ 'dc.date', webpage, 'upload date'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'formats': formats,
+ }
+
+
+class ORFFM4StoryIE(InfoExtractor):
+ IE_NAME = 'orf:fm4:story'
+ IE_DESC = 'fm4.orf.at stories'
+ _VALID_URL = r'https?://fm4\.orf\.at/stories/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://fm4.orf.at/stories/2865738/',
+ 'playlist': [{
+ 'md5': 'e1c2c706c45c7b34cf478bbf409907ca',
+ 'info_dict': {
+ 'id': '547792',
+ 'ext': 'flv',
+ 'title': 'Manu Delago und Inner Tongue live',
+ 'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.',
+ 'duration': 1748.52,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20170913',
+ },
+ }, {
+ 'md5': 'c6dd2179731f86f4f55a7b49899d515f',
+ 'info_dict': {
+ 'id': '547798',
+ 'ext': 'flv',
+ 'title': 'Manu Delago und Inner Tongue live (2)',
+ 'duration': 1504.08,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'upload_date': '20170913',
+ 'description': 'Manu Delago und Inner Tongue haben bei der FM4 Soundpark Session live alles gegeben. Hier gibt es Fotos und die gesamte Session als Video.',
+ },
+ }],
+ }
+
+ def _real_extract(self, url):
+ story_id = self._match_id(url)
+ webpage = self._download_webpage(url, story_id)
+
+ entries = []
+ all_ids = orderedSet(re.findall(r'data-video(?:id)?="(\d+)"', webpage))
+ for idx, video_id in enumerate(all_ids):
+ data = self._download_json(
+ 'http://bits.orf.at/filehandler/static-api/json/current/data.json?file=%s' % video_id,
+ video_id)[0]
+
+ duration = float_or_none(data['duration'], 1000)
+
+ video = data['sources']['q8c']
+ load_balancer_url = video['loadBalancerUrl']
+ abr = int_or_none(video.get('audioBitrate'))
+ vbr = int_or_none(video.get('bitrate'))
+ fps = int_or_none(video.get('videoFps'))
+ width = int_or_none(video.get('videoWidth'))
+ height = int_or_none(video.get('videoHeight'))
+ thumbnail = video.get('preview')
+
+ rendition = self._download_json(
+ load_balancer_url, video_id, transform_source=strip_jsonp)
+
+ f = {
+ 'abr': abr,
+ 'vbr': vbr,
+ 'fps': fps,
+ 'width': width,
+ 'height': height,
+ }
+
+ formats = []
+ for format_id, format_url in rendition['redirect'].items():
+ if format_id == 'rtmp':
+ ff = f.copy()
+ ff.update({
+ 'url': format_url,
+ 'format_id': format_id,
+ })
+ formats.append(ff)
+ elif determine_ext(format_url) == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ format_url, video_id, f4m_id=format_id))
+ elif determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', m3u8_id=format_id))
+ else:
+ continue
+ self._sort_formats(formats)
+
+ title = remove_end(self._og_search_title(webpage), ' - fm4.ORF.at')
+ if idx >= 1:
+ # Titles are duplicates, make them unique
+ title += ' (' + str(idx + 1) + ')'
+ description = self._og_search_description(webpage)
+ upload_date = unified_strdate(self._html_search_meta(
+ 'dc.date', webpage, 'upload date'))
+
+ entries.append({
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'formats': formats,
+ })
+
+ return self.playlist_result(entries)
diff --git a/youtube_dl/extractor/outsidetv.py b/youtube_dlc/extractor/outsidetv.py
index c5333b08c..c5333b08c 100644
--- a/youtube_dl/extractor/outsidetv.py
+++ b/youtube_dlc/extractor/outsidetv.py
diff --git a/youtube_dl/extractor/packtpub.py b/youtube_dlc/extractor/packtpub.py
index 11ad3b3b8..11ad3b3b8 100644
--- a/youtube_dl/extractor/packtpub.py
+++ b/youtube_dlc/extractor/packtpub.py
diff --git a/youtube_dl/extractor/pandoratv.py b/youtube_dlc/extractor/pandoratv.py
index 538738c09..538738c09 100644
--- a/youtube_dl/extractor/pandoratv.py
+++ b/youtube_dlc/extractor/pandoratv.py
diff --git a/youtube_dl/extractor/parliamentliveuk.py b/youtube_dlc/extractor/parliamentliveuk.py
index bdd5ff565..bdd5ff565 100644
--- a/youtube_dl/extractor/parliamentliveuk.py
+++ b/youtube_dlc/extractor/parliamentliveuk.py
diff --git a/youtube_dlc/extractor/patreon.py b/youtube_dlc/extractor/patreon.py
new file mode 100644
index 000000000..761a4b1de
--- /dev/null
+++ b/youtube_dlc/extractor/patreon.py
@@ -0,0 +1,156 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ clean_html,
+ determine_ext,
+ int_or_none,
+ KNOWN_EXTENSIONS,
+ mimetype2ext,
+ parse_iso8601,
+ str_or_none,
+ try_get,
+)
+
+
+class PatreonIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?patreon\.com/(?:creation\?hid=|posts/(?:[\w-]+-)?)(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.patreon.com/creation?hid=743933',
+ 'md5': 'e25505eec1053a6e6813b8ed369875cc',
+ 'info_dict': {
+ 'id': '743933',
+ 'ext': 'mp3',
+ 'title': 'Episode 166: David Smalley of Dogma Debate',
+ 'description': 'md5:713b08b772cd6271b9f3906683cfacdf',
+ 'uploader': 'Cognitive Dissonance Podcast',
+ 'thumbnail': 're:^https?://.*$',
+ 'timestamp': 1406473987,
+ 'upload_date': '20140727',
+ 'uploader_id': '87145',
+ },
+ }, {
+ 'url': 'http://www.patreon.com/creation?hid=754133',
+ 'md5': '3eb09345bf44bf60451b8b0b81759d0a',
+ 'info_dict': {
+ 'id': '754133',
+ 'ext': 'mp3',
+ 'title': 'CD 167 Extra',
+ 'uploader': 'Cognitive Dissonance Podcast',
+ 'thumbnail': 're:^https?://.*$',
+ },
+ 'skip': 'Patron-only content',
+ }, {
+ 'url': 'https://www.patreon.com/creation?hid=1682498',
+ 'info_dict': {
+ 'id': 'SU4fj_aEMVw',
+ 'ext': 'mp4',
+ 'title': 'I\'m on Patreon!',
+ 'uploader': 'TraciJHines',
+ 'thumbnail': 're:^https?://.*$',
+ 'upload_date': '20150211',
+ 'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
+ 'uploader_id': 'TraciJHines',
+ },
+ 'params': {
+ 'noplaylist': True,
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://www.patreon.com/posts/episode-166-of-743933',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.patreon.com/posts/743933',
+ 'only_matching': True,
+ }]
+
+ # Currently Patreon exposes download URL via hidden CSS, so login is not
+ # needed. Keeping this commented for when this inevitably changes.
+ '''
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ login_form = {
+ 'redirectUrl': 'http://www.patreon.com/',
+ 'email': username,
+ 'password': password,
+ }
+
+ request = sanitized_Request(
+ 'https://www.patreon.com/processLogin',
+ compat_urllib_parse_urlencode(login_form).encode('utf-8')
+ )
+ login_page = self._download_webpage(request, None, note='Logging in')
+
+ if re.search(r'onLoginFailed', login_page):
+ raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)
+
+ def _real_initialize(self):
+ self._login()
+ '''
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ post = self._download_json(
+ 'https://www.patreon.com/api/posts/' + video_id, video_id, query={
+ 'fields[media]': 'download_url,mimetype,size_bytes',
+ 'fields[post]': 'comment_count,content,embed,image,like_count,post_file,published_at,title',
+ 'fields[user]': 'full_name,url',
+ 'json-api-use-default-includes': 'false',
+ 'include': 'media,user',
+ })
+ attributes = post['data']['attributes']
+ title = attributes['title'].strip()
+ image = attributes.get('image') or {}
+ info = {
+ 'id': video_id,
+ 'title': title,
+ 'description': clean_html(attributes.get('content')),
+ 'thumbnail': image.get('large_url') or image.get('url'),
+ 'timestamp': parse_iso8601(attributes.get('published_at')),
+ 'like_count': int_or_none(attributes.get('like_count')),
+ 'comment_count': int_or_none(attributes.get('comment_count')),
+ }
+
+ for i in post.get('included', []):
+ i_type = i.get('type')
+ if i_type == 'media':
+ media_attributes = i.get('attributes') or {}
+ download_url = media_attributes.get('download_url')
+ ext = mimetype2ext(media_attributes.get('mimetype'))
+ if download_url and ext in KNOWN_EXTENSIONS:
+ info.update({
+ 'ext': ext,
+ 'filesize': int_or_none(media_attributes.get('size_bytes')),
+ 'url': download_url,
+ })
+ elif i_type == 'user':
+ user_attributes = i.get('attributes')
+ if user_attributes:
+ info.update({
+ 'uploader': user_attributes.get('full_name'),
+ 'uploader_id': str_or_none(i.get('id')),
+ 'uploader_url': user_attributes.get('url'),
+ })
+
+ if not info.get('url'):
+ embed_url = try_get(attributes, lambda x: x['embed']['url'])
+ if embed_url:
+ info.update({
+ '_type': 'url',
+ 'url': embed_url,
+ })
+
+ if not info.get('url'):
+ post_file = attributes['post_file']
+ ext = determine_ext(post_file.get('name'))
+ if ext in KNOWN_EXTENSIONS:
+ info.update({
+ 'ext': ext,
+ 'url': post_file['url'],
+ })
+
+ return info
diff --git a/youtube_dl/extractor/pbs.py b/youtube_dlc/extractor/pbs.py
index 4dbe661be..4dbe661be 100644
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dlc/extractor/pbs.py
diff --git a/youtube_dl/extractor/pearvideo.py b/youtube_dlc/extractor/pearvideo.py
index 1d777221c..1d777221c 100644
--- a/youtube_dl/extractor/pearvideo.py
+++ b/youtube_dlc/extractor/pearvideo.py
diff --git a/youtube_dlc/extractor/peertube.py b/youtube_dlc/extractor/peertube.py
new file mode 100644
index 000000000..48fb95416
--- /dev/null
+++ b/youtube_dlc/extractor/peertube.py
@@ -0,0 +1,600 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ parse_resolution,
+ str_or_none,
+ try_get,
+ unified_timestamp,
+ url_or_none,
+ urljoin,
+)
+
+
+class PeerTubeIE(InfoExtractor):
+ _INSTANCES_RE = r'''(?:
+ # Taken from https://instances.joinpeertube.org/instances
+ peertube\.rainbowswingers\.net|
+ tube\.stanisic\.nl|
+ peer\.suiri\.us|
+ medias\.libox\.fr|
+ videomensoif\.ynh\.fr|
+ peertube\.travelpandas\.eu|
+ peertube\.rachetjay\.fr|
+ peertube\.montecsys\.fr|
+ tube\.eskuero\.me|
+ peer\.tube|
+ peertube\.umeahackerspace\.se|
+ tube\.nx-pod\.de|
+ video\.monsieurbidouille\.fr|
+ tube\.openalgeria\.org|
+ vid\.lelux\.fi|
+ video\.anormallostpod\.ovh|
+ tube\.crapaud-fou\.org|
+ peertube\.stemy\.me|
+ lostpod\.space|
+ exode\.me|
+ peertube\.snargol\.com|
+ vis\.ion\.ovh|
+ videosdulib\.re|
+ v\.mbius\.io|
+ videos\.judrey\.eu|
+ peertube\.osureplayviewer\.xyz|
+ peertube\.mathieufamily\.ovh|
+ www\.videos-libr\.es|
+ fightforinfo\.com|
+ peertube\.fediverse\.ru|
+ peertube\.oiseauroch\.fr|
+ video\.nesven\.eu|
+ v\.bearvideo\.win|
+ video\.qoto\.org|
+ justporn\.cc|
+ video\.vny\.fr|
+ peervideo\.club|
+ tube\.taker\.fr|
+ peertube\.chantierlibre\.org|
+ tube\.ipfixe\.info|
+ tube\.kicou\.info|
+ tube\.dodsorf\.as|
+ videobit\.cc|
+ video\.yukari\.moe|
+ videos\.elbinario\.net|
+ hkvideo\.live|
+ pt\.tux\.tf|
+ www\.hkvideo\.live|
+ FIGHTFORINFO\.com|
+ pt\.765racing\.com|
+ peertube\.gnumeria\.eu\.org|
+ nordenmedia\.com|
+ peertube\.co\.uk|
+ tube\.darfweb\.eu|
+ tube\.kalah-france\.org|
+ 0ch\.in|
+ vod\.mochi\.academy|
+ film\.node9\.org|
+ peertube\.hatthieves\.es|
+ video\.fitchfamily\.org|
+ peertube\.ddns\.net|
+ video\.ifuncle\.kr|
+ video\.fdlibre\.eu|
+ tube\.22decembre\.eu|
+ peertube\.harmoniescreatives\.com|
+ tube\.fabrigli\.fr|
+ video\.thedwyers\.co|
+ video\.bruitbruit\.com|
+ peertube\.foxfam\.club|
+ peer\.philoxweb\.be|
+ videos\.bugs\.social|
+ peertube\.malbert\.xyz|
+ peertube\.bilange\.ca|
+ libretube\.net|
+ diytelevision\.com|
+ peertube\.fedilab\.app|
+ libre\.video|
+ video\.mstddntfdn\.online|
+ us\.tv|
+ peertube\.sl-network\.fr|
+ peertube\.dynlinux\.io|
+ peertube\.david\.durieux\.family|
+ peertube\.linuxrocks\.online|
+ peerwatch\.xyz|
+ v\.kretschmann\.social|
+ tube\.otter\.sh|
+ yt\.is\.nota\.live|
+ tube\.dragonpsi\.xyz|
+ peertube\.boneheadmedia\.com|
+ videos\.funkwhale\.audio|
+ watch\.44con\.com|
+ peertube\.gcaillaut\.fr|
+ peertube\.icu|
+ pony\.tube|
+ spacepub\.space|
+ tube\.stbr\.io|
+ v\.mom-gay\.faith|
+ tube\.port0\.xyz|
+ peertube\.simounet\.net|
+ play\.jergefelt\.se|
+ peertube\.zeteo\.me|
+ tube\.danq\.me|
+ peertube\.kerenon\.com|
+ tube\.fab-l3\.org|
+ tube\.calculate\.social|
+ peertube\.mckillop\.org|
+ tube\.netzspielplatz\.de|
+ vod\.ksite\.de|
+ peertube\.laas\.fr|
+ tube\.govital\.net|
+ peertube\.stephenson\.cc|
+ bistule\.nohost\.me|
+ peertube\.kajalinifi\.de|
+ video\.ploud\.jp|
+ video\.omniatv\.com|
+ peertube\.ffs2play\.fr|
+ peertube\.leboulaire\.ovh|
+ peertube\.tronic-studio\.com|
+ peertube\.public\.cat|
+ peertube\.metalbanana\.net|
+ video\.1000i100\.fr|
+ peertube\.alter-nativ-voll\.de|
+ tube\.pasa\.tf|
+ tube\.worldofhauru\.xyz|
+ pt\.kamp\.site|
+ peertube\.teleassist\.fr|
+ videos\.mleduc\.xyz|
+ conf\.tube|
+ media\.privacyinternational\.org|
+ pt\.forty-two\.nl|
+ video\.halle-leaks\.de|
+ video\.grosskopfgames\.de|
+ peertube\.schaeferit\.de|
+ peertube\.jackbot\.fr|
+ tube\.extinctionrebellion\.fr|
+ peertube\.f-si\.org|
+ video\.subak\.ovh|
+ videos\.koweb\.fr|
+ peertube\.zergy\.net|
+ peertube\.roflcopter\.fr|
+ peertube\.floss-marketing-school\.com|
+ vloggers\.social|
+ peertube\.iriseden\.eu|
+ videos\.ubuntu-paris\.org|
+ peertube\.mastodon\.host|
+ armstube\.com|
+ peertube\.s2s\.video|
+ peertube\.lol|
+ tube\.open-plug\.eu|
+ open\.tube|
+ peertube\.ch|
+ peertube\.normandie-libre\.fr|
+ peertube\.slat\.org|
+ video\.lacaveatonton\.ovh|
+ peertube\.uno|
+ peertube\.servebeer\.com|
+ peertube\.fedi\.quebec|
+ tube\.h3z\.jp|
+ tube\.plus200\.com|
+ peertube\.eric\.ovh|
+ tube\.metadocs\.cc|
+ tube\.unmondemeilleur\.eu|
+ gouttedeau\.space|
+ video\.antirep\.net|
+ nrop\.cant\.at|
+ tube\.ksl-bmx\.de|
+ tube\.plaf\.fr|
+ tube\.tchncs\.de|
+ video\.devinberg\.com|
+ hitchtube\.fr|
+ peertube\.kosebamse\.com|
+ yunopeertube\.myddns\.me|
+ peertube\.varney\.fr|
+ peertube\.anon-kenkai\.com|
+ tube\.maiti\.info|
+ tubee\.fr|
+ videos\.dinofly\.com|
+ toobnix\.org|
+ videotape\.me|
+ voca\.tube|
+ video\.heromuster\.com|
+ video\.lemediatv\.fr|
+ video\.up\.edu\.ph|
+ balafon\.video|
+ video\.ivel\.fr|
+ thickrips\.cloud|
+ pt\.laurentkruger\.fr|
+ video\.monarch-pass\.net|
+ peertube\.artica\.center|
+ video\.alternanet\.fr|
+ indymotion\.fr|
+ fanvid\.stopthatimp\.net|
+ video\.farci\.org|
+ v\.lesterpig\.com|
+ video\.okaris\.de|
+ tube\.pawelko\.net|
+ peertube\.mablr\.org|
+ tube\.fede\.re|
+ pytu\.be|
+ evertron\.tv|
+ devtube\.dev-wiki\.de|
+ raptube\.antipub\.org|
+ video\.selea\.se|
+ peertube\.mygaia\.org|
+ video\.oh14\.de|
+ peertube\.livingutopia\.org|
+ peertube\.the-penguin\.de|
+ tube\.thechangebook\.org|
+ tube\.anjara\.eu|
+ pt\.pube\.tk|
+ video\.samedi\.pm|
+ mplayer\.demouliere\.eu|
+ widemus\.de|
+ peertube\.me|
+ peertube\.zapashcanon\.fr|
+ video\.latavernedejohnjohn\.fr|
+ peertube\.pcservice46\.fr|
+ peertube\.mazzonetto\.eu|
+ video\.irem\.univ-paris-diderot\.fr|
+ video\.livecchi\.cloud|
+ alttube\.fr|
+ video\.coop\.tools|
+ video\.cabane-libre\.org|
+ peertube\.openstreetmap\.fr|
+ videos\.alolise\.org|
+ irrsinn\.video|
+ video\.antopie\.org|
+ scitech\.video|
+ tube2\.nemsia\.org|
+ video\.amic37\.fr|
+ peertube\.freeforge\.eu|
+ video\.arbitrarion\.com|
+ video\.datsemultimedia\.com|
+ stoptrackingus\.tv|
+ peertube\.ricostrongxxx\.com|
+ docker\.videos\.lecygnenoir\.info|
+ peertube\.togart\.de|
+ tube\.postblue\.info|
+ videos\.domainepublic\.net|
+ peertube\.cyber-tribal\.com|
+ video\.gresille\.org|
+ peertube\.dsmouse\.net|
+ cinema\.yunohost\.support|
+ tube\.theocevaer\.fr|
+ repro\.video|
+ tube\.4aem\.com|
+ quaziinc\.com|
+ peertube\.metawurst\.space|
+ videos\.wakapo\.com|
+ video\.ploud\.fr|
+ video\.freeradical\.zone|
+ tube\.valinor\.fr|
+ refuznik\.video|
+ pt\.kircheneuenburg\.de|
+ peertube\.asrun\.eu|
+ peertube\.lagob\.fr|
+ videos\.side-ways\.net|
+ 91video\.online|
+ video\.valme\.io|
+ video\.taboulisme\.com|
+ videos-libr\.es|
+ tv\.mooh\.fr|
+ nuage\.acostey\.fr|
+ video\.monsieur-a\.fr|
+ peertube\.librelois\.fr|
+ videos\.pair2jeux\.tube|
+ videos\.pueseso\.club|
+ peer\.mathdacloud\.ovh|
+ media\.assassinate-you\.net|
+ vidcommons\.org|
+ ptube\.rousset\.nom\.fr|
+ tube\.cyano\.at|
+ videos\.squat\.net|
+ video\.iphodase\.fr|
+ peertube\.makotoworkshop\.org|
+ peertube\.serveur\.slv-valbonne\.fr|
+ vault\.mle\.party|
+ hostyour\.tv|
+ videos\.hack2g2\.fr|
+ libre\.tube|
+ pire\.artisanlogiciel\.net|
+ videos\.numerique-en-commun\.fr|
+ video\.netsyms\.com|
+ video\.die-partei\.social|
+ video\.writeas\.org|
+ peertube\.swarm\.solvingmaz\.es|
+ tube\.pericoloso\.ovh|
+ watching\.cypherpunk\.observer|
+ videos\.adhocmusic\.com|
+ tube\.rfc1149\.net|
+ peertube\.librelabucm\.org|
+ videos\.numericoop\.fr|
+ peertube\.koehn\.com|
+ peertube\.anarchmusicall\.net|
+ tube\.kampftoast\.de|
+ vid\.y-y\.li|
+ peertube\.xtenz\.xyz|
+ diode\.zone|
+ tube\.egf\.mn|
+ peertube\.nomagic\.uk|
+ visionon\.tv|
+ videos\.koumoul\.com|
+ video\.rastapuls\.com|
+ video\.mantlepro\.com|
+ video\.deadsuperhero\.com|
+ peertube\.musicstudio\.pro|
+ peertube\.we-keys\.fr|
+ artitube\.artifaille\.fr|
+ peertube\.ethernia\.net|
+ tube\.midov\.pl|
+ peertube\.fr|
+ watch\.snoot\.tube|
+ peertube\.donnadieu\.fr|
+ argos\.aquilenet\.fr|
+ tube\.nemsia\.org|
+ tube\.bruniau\.net|
+ videos\.darckoune\.moe|
+ tube\.traydent\.info|
+ dev\.videos\.lecygnenoir\.info|
+ peertube\.nayya\.org|
+ peertube\.live|
+ peertube\.mofgao\.space|
+ video\.lequerrec\.eu|
+ peertube\.amicale\.net|
+ aperi\.tube|
+ tube\.ac-lyon\.fr|
+ video\.lw1\.at|
+ www\.yiny\.org|
+ videos\.pofilo\.fr|
+ tube\.lou\.lt|
+ choob\.h\.etbus\.ch|
+ tube\.hoga\.fr|
+ peertube\.heberge\.fr|
+ video\.obermui\.de|
+ videos\.cloudfrancois\.fr|
+ betamax\.video|
+ video\.typica\.us|
+ tube\.piweb\.be|
+ video\.blender\.org|
+ peertube\.cat|
+ tube\.kdy\.ch|
+ pe\.ertu\.be|
+ peertube\.social|
+ videos\.lescommuns\.org|
+ tv\.datamol\.org|
+ videonaute\.fr|
+ dialup\.express|
+ peertube\.nogafa\.org|
+ megatube\.lilomoino\.fr|
+ peertube\.tamanoir\.foucry\.net|
+ peertube\.devosi\.org|
+ peertube\.1312\.media|
+ tube\.bootlicker\.party|
+ skeptikon\.fr|
+ video\.blueline\.mg|
+ tube\.homecomputing\.fr|
+ tube\.ouahpiti\.info|
+ video\.tedomum\.net|
+ video\.g3l\.org|
+ fontube\.fr|
+ peertube\.gaialabs\.ch|
+ tube\.kher\.nl|
+ peertube\.qtg\.fr|
+ video\.migennes\.net|
+ tube\.p2p\.legal|
+ troll\.tv|
+ videos\.iut-orsay\.fr|
+ peertube\.solidev\.net|
+ videos\.cemea\.org|
+ video\.passageenseine\.fr|
+ videos\.festivalparminous\.org|
+ peertube\.touhoppai\.moe|
+ sikke\.fi|
+ peer\.hostux\.social|
+ share\.tube|
+ peertube\.walkingmountains\.fr|
+ videos\.benpro\.fr|
+ peertube\.parleur\.net|
+ peertube\.heraut\.eu|
+ tube\.aquilenet\.fr|
+ peertube\.gegeweb\.eu|
+ framatube\.org|
+ thinkerview\.video|
+ tube\.conferences-gesticulees\.net|
+ peertube\.datagueule\.tv|
+ video\.lqdn\.fr|
+ tube\.mochi\.academy|
+ media\.zat\.im|
+ video\.colibris-outilslibres\.org|
+ tube\.svnet\.fr|
+ peertube\.video|
+ peertube3\.cpy\.re|
+ peertube2\.cpy\.re|
+ videos\.tcit\.fr|
+ peertube\.cpy\.re
+ )'''
+ _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
+ _API_BASE = 'https://%s/api/v1/videos/%s/%s'
+ _VALID_URL = r'''(?x)
+ (?:
+ peertube:(?P<host>[^:]+):|
+ https?://(?P<host_2>%s)/(?:videos/(?:watch|embed)|api/v\d/videos)/
+ )
+ (?P<id>%s)
+ ''' % (_INSTANCES_RE, _UUID_RE)
+ _TESTS = [{
+ 'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d',
+ 'md5': '9bed8c0137913e17b86334e5885aacff',
+ 'info_dict': {
+ 'id': '9c9de5e8-0a1e-484a-b099-e80766180a6d',
+ 'ext': 'mp4',
+ 'title': 'What is PeerTube?',
+ 'description': 'md5:3fefb8dde2b189186ce0719fda6f7b10',
+ 'thumbnail': r're:https?://.*\.(?:jpg|png)',
+ 'timestamp': 1538391166,
+ 'upload_date': '20181001',
+ 'uploader': 'Framasoft',
+ 'uploader_id': '3',
+ 'uploader_url': 'https://framatube.org/accounts/framasoft',
+ 'channel': 'Les vidéos de Framasoft',
+ 'channel_id': '2',
+ 'channel_url': 'https://framatube.org/video-channels/bf54d359-cfad-4935-9d45-9d6be93f63e8',
+ 'language': 'en',
+ 'license': 'Attribution - Share Alike',
+ 'duration': 113,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'tags': ['framasoft', 'peertube'],
+ 'categories': ['Science & Technology'],
+ }
+ }, {
+ 'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
+ 'only_matching': True,
+ }, {
+ # nsfw
+ 'url': 'https://tube.22decembre.eu/videos/watch/9bb88cd3-9959-46d9-9ab9-33d2bb704c39',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tube.22decembre.eu/videos/embed/fed67262-6edb-4d1c-833b-daa9085c71d7',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tube.openalgeria.org/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8',
+ 'only_matching': True,
+ }, {
+ 'url': 'peertube:video.blender.org:b37a5b9f-e6b5-415c-b700-04a5cd6ec205',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_peertube_url(webpage, source_url):
+ mobj = re.match(
+ r'https?://(?P<host>[^/]+)/videos/(?:watch|embed)/(?P<id>%s)'
+ % PeerTubeIE._UUID_RE, source_url)
+ if mobj and any(p in webpage for p in (
+ '<title>PeerTube<',
+ 'There will be other non JS-based clients to access PeerTube',
+ '>We are sorry but it seems that PeerTube is not compatible with your web browser.<')):
+ return 'peertube:%s:%s' % mobj.group('host', 'id')
+
+ @staticmethod
+ def _extract_urls(webpage, source_url):
+ entries = re.findall(
+ r'''(?x)<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//%s/videos/embed/%s)'''
+ % (PeerTubeIE._INSTANCES_RE, PeerTubeIE._UUID_RE), webpage)
+ if not entries:
+ peertube_url = PeerTubeIE._extract_peertube_url(webpage, source_url)
+ if peertube_url:
+ entries = [peertube_url]
+ return entries
+
+ def _call_api(self, host, video_id, path, note=None, errnote=None, fatal=True):
+ return self._download_json(
+ self._API_BASE % (host, video_id, path), video_id,
+ note=note, errnote=errnote, fatal=fatal)
+
+ def _get_subtitles(self, host, video_id):
+ captions = self._call_api(
+ host, video_id, 'captions', note='Downloading captions JSON',
+ fatal=False)
+ if not isinstance(captions, dict):
+ return
+ data = captions.get('data')
+ if not isinstance(data, list):
+ return
+ subtitles = {}
+ for e in data:
+ language_id = try_get(e, lambda x: x['language']['id'], compat_str)
+ caption_url = urljoin('https://%s' % host, e.get('captionPath'))
+ if not caption_url:
+ continue
+ subtitles.setdefault(language_id or 'en', []).append({
+ 'url': caption_url,
+ })
+ return subtitles
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ host = mobj.group('host') or mobj.group('host_2')
+ video_id = mobj.group('id')
+
+ video = self._call_api(
+ host, video_id, '', note='Downloading video JSON')
+
+ title = video['name']
+
+ formats = []
+ for file_ in video['files']:
+ if not isinstance(file_, dict):
+ continue
+ file_url = url_or_none(file_.get('fileUrl'))
+ if not file_url:
+ continue
+ file_size = int_or_none(file_.get('size'))
+ format_id = try_get(
+ file_, lambda x: x['resolution']['label'], compat_str)
+ f = parse_resolution(format_id)
+ f.update({
+ 'url': file_url,
+ 'format_id': format_id,
+ 'filesize': file_size,
+ })
+ formats.append(f)
+ self._sort_formats(formats)
+
+ full_description = self._call_api(
+ host, video_id, 'description', note='Downloading description JSON',
+ fatal=False)
+
+ description = None
+ if isinstance(full_description, dict):
+ description = str_or_none(full_description.get('description'))
+ if not description:
+ description = video.get('description')
+
+ subtitles = self.extract_subtitles(host, video_id)
+
+ def data(section, field, type_):
+ return try_get(video, lambda x: x[section][field], type_)
+
+ def account_data(field, type_):
+ return data('account', field, type_)
+
+ def channel_data(field, type_):
+ return data('channel', field, type_)
+
+ category = data('category', 'label', compat_str)
+ categories = [category] if category else None
+
+ nsfw = video.get('nsfw')
+ if nsfw is bool:
+ age_limit = 18 if nsfw else 0
+ else:
+ age_limit = None
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': urljoin(url, video.get('thumbnailPath')),
+ 'timestamp': unified_timestamp(video.get('publishedAt')),
+ 'uploader': account_data('displayName', compat_str),
+ 'uploader_id': str_or_none(account_data('id', int)),
+ 'uploader_url': url_or_none(account_data('url', compat_str)),
+ 'channel': channel_data('displayName', compat_str),
+ 'channel_id': str_or_none(channel_data('id', int)),
+ 'channel_url': url_or_none(channel_data('url', compat_str)),
+ 'language': data('language', 'id', compat_str),
+ 'license': data('licence', 'label', compat_str),
+ 'duration': int_or_none(video.get('duration')),
+ 'view_count': int_or_none(video.get('views')),
+ 'like_count': int_or_none(video.get('likes')),
+ 'dislike_count': int_or_none(video.get('dislikes')),
+ 'age_limit': age_limit,
+ 'tags': try_get(video, lambda x: x['tags'], list),
+ 'categories': categories,
+ 'formats': formats,
+ 'subtitles': subtitles
+ }
diff --git a/youtube_dl/extractor/people.py b/youtube_dlc/extractor/people.py
index 6ca95715e..6ca95715e 100644
--- a/youtube_dl/extractor/people.py
+++ b/youtube_dlc/extractor/people.py
diff --git a/youtube_dl/extractor/performgroup.py b/youtube_dlc/extractor/performgroup.py
index 26942bfb3..26942bfb3 100644
--- a/youtube_dl/extractor/performgroup.py
+++ b/youtube_dlc/extractor/performgroup.py
diff --git a/youtube_dlc/extractor/periscope.py b/youtube_dlc/extractor/periscope.py
new file mode 100644
index 000000000..b15906390
--- /dev/null
+++ b/youtube_dlc/extractor/periscope.py
@@ -0,0 +1,189 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+ unescapeHTML,
+)
+
+
+class PeriscopeBaseIE(InfoExtractor):
+ def _call_api(self, method, query, item_id):
+ return self._download_json(
+ 'https://api.periscope.tv/api/v2/%s' % method,
+ item_id, query=query)
+
+ def _parse_broadcast_data(self, broadcast, video_id):
+ title = broadcast.get('status') or 'Periscope Broadcast'
+ uploader = broadcast.get('user_display_name') or broadcast.get('username')
+ title = '%s - %s' % (uploader, title) if uploader else title
+ is_live = broadcast.get('state').lower() == 'running'
+
+ thumbnails = [{
+ 'url': broadcast[image],
+ } for image in ('image_url', 'image_url_small') if broadcast.get(image)]
+
+ return {
+ 'id': broadcast.get('id') or video_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'timestamp': parse_iso8601(broadcast.get('created_at')),
+ 'uploader': uploader,
+ 'uploader_id': broadcast.get('user_id') or broadcast.get('username'),
+ 'thumbnails': thumbnails,
+ 'view_count': int_or_none(broadcast.get('total_watched')),
+ 'tags': broadcast.get('tags'),
+ 'is_live': is_live,
+ }
+
+ @staticmethod
+ def _extract_common_format_info(broadcast):
+ return broadcast.get('state').lower(), int_or_none(broadcast.get('width')), int_or_none(broadcast.get('height'))
+
+ @staticmethod
+ def _add_width_and_height(f, width, height):
+ for key, val in (('width', width), ('height', height)):
+ if not f.get(key):
+ f[key] = val
+
+ def _extract_pscp_m3u8_formats(self, m3u8_url, video_id, format_id, state, width, height, fatal=True):
+ m3u8_formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4',
+ entry_protocol='m3u8_native'
+ if state in ('ended', 'timed_out') else 'm3u8',
+ m3u8_id=format_id, fatal=fatal)
+ if len(m3u8_formats) == 1:
+ self._add_width_and_height(m3u8_formats[0], width, height)
+ return m3u8_formats
+
+
+class PeriscopeIE(PeriscopeBaseIE):
+ IE_DESC = 'Periscope'
+ IE_NAME = 'periscope'
+ _VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/[^/]+/(?P<id>[^/?#]+)'
+ # Alive example URLs can be found here https://www.periscope.tv/
+ _TESTS = [{
+ 'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==',
+ 'md5': '65b57957972e503fcbbaeed8f4fa04ca',
+ 'info_dict': {
+ 'id': '56102209',
+ 'ext': 'mp4',
+ 'title': 'Bec Boop - 🚠✈️🇬🇧 Fly above #London in Emirates Air Line cable car at night 🇬🇧✈️🚠 #BoopScope 🎀💗',
+ 'timestamp': 1438978559,
+ 'upload_date': '20150807',
+ 'uploader': 'Bec Boop',
+ 'uploader_id': '1465763',
+ },
+ 'skip': 'Expires in 24 hours',
+ }, {
+ 'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.periscope.tv/bastaakanoggano/1OdKrlkZZjOJX',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<iframe[^>]+src=([\'"])(?P<url>(?:https?:)?//(?:www\.)?(?:periscope|pscp)\.tv/(?:(?!\1).)+)\1', webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ token = self._match_id(url)
+
+ stream = self._call_api(
+ 'accessVideoPublic', {'broadcast_id': token}, token)
+
+ broadcast = stream['broadcast']
+ info = self._parse_broadcast_data(broadcast, token)
+
+ state = broadcast.get('state').lower()
+ width = int_or_none(broadcast.get('width'))
+ height = int_or_none(broadcast.get('height'))
+
+ def add_width_and_height(f):
+ for key, val in (('width', width), ('height', height)):
+ if not f.get(key):
+ f[key] = val
+
+ video_urls = set()
+ formats = []
+ for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'):
+ video_url = stream.get(format_id + '_url')
+ if not video_url or video_url in video_urls:
+ continue
+ video_urls.add(video_url)
+ if format_id != 'rtmp':
+ m3u8_formats = self._extract_pscp_m3u8_formats(
+ video_url, token, format_id, state, width, height, False)
+ formats.extend(m3u8_formats)
+ continue
+ rtmp_format = {
+ 'url': video_url,
+ 'ext': 'flv' if format_id == 'rtmp' else 'mp4',
+ }
+ self._add_width_and_height(rtmp_format)
+ formats.append(rtmp_format)
+ self._sort_formats(formats)
+
+ info['formats'] = formats
+ return info
+
+
+class PeriscopeUserIE(PeriscopeBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/(?P<id>[^/]+)/?$'
+ IE_DESC = 'Periscope user videos'
+ IE_NAME = 'periscope:user'
+
+ _TEST = {
+ 'url': 'https://www.periscope.tv/LularoeHusbandMike/',
+ 'info_dict': {
+ 'id': 'LularoeHusbandMike',
+ 'title': 'LULAROE HUSBAND MIKE',
+ 'description': 'md5:6cf4ec8047768098da58e446e82c82f0',
+ },
+ # Periscope only shows videos in the last 24 hours, so it's possible to
+ # get 0 videos
+ 'playlist_mincount': 0,
+ }
+
+ def _real_extract(self, url):
+ user_name = self._match_id(url)
+
+ webpage = self._download_webpage(url, user_name)
+
+ data_store = self._parse_json(
+ unescapeHTML(self._search_regex(
+ r'data-store=(["\'])(?P<data>.+?)\1',
+ webpage, 'data store', default='{}', group='data')),
+ user_name)
+
+ user = list(data_store['UserCache']['users'].values())[0]['user']
+ user_id = user['id']
+ session_id = data_store['SessionToken']['public']['broadcastHistory']['token']['session_id']
+
+ broadcasts = self._call_api(
+ 'getUserBroadcastsPublic',
+ {'user_id': user_id, 'session_id': session_id},
+ user_name)['broadcasts']
+
+ broadcast_ids = [
+ broadcast['id'] for broadcast in broadcasts if broadcast.get('id')]
+
+ title = user.get('display_name') or user.get('username') or user_name
+ description = user.get('description')
+
+ entries = [
+ self.url_result(
+ 'https://www.periscope.tv/%s/%s' % (user_name, broadcast_id))
+ for broadcast_id in broadcast_ids]
+
+ return self.playlist_result(entries, user_id, title, description)
diff --git a/youtube_dl/extractor/philharmoniedeparis.py b/youtube_dlc/extractor/philharmoniedeparis.py
index 03da64b11..03da64b11 100644
--- a/youtube_dl/extractor/philharmoniedeparis.py
+++ b/youtube_dlc/extractor/philharmoniedeparis.py
diff --git a/youtube_dlc/extractor/phoenix.py b/youtube_dlc/extractor/phoenix.py
new file mode 100644
index 000000000..8d52ad3b4
--- /dev/null
+++ b/youtube_dlc/extractor/phoenix.py
@@ -0,0 +1,52 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class PhoenixIE(InfoExtractor):
+ IE_NAME = 'phoenix.de'
+ _VALID_URL = r'''https?://(?:www\.)?phoenix.de/\D+(?P<id>\d+)\.html'''
+ _TESTS = [
+ {
+ 'url': 'https://www.phoenix.de/sendungen/dokumentationen/unsere-welt-in-zukunft---stadt-a-1283620.html',
+ 'md5': '5e765e838aa3531c745a4f5b249ee3e3',
+ 'info_dict': {
+ 'id': '0OB4HFc43Ns',
+ 'ext': 'mp4',
+ 'title': 'Unsere Welt in Zukunft - Stadt',
+ 'description': 'md5:9bfb6fd498814538f953b2dcad7ce044',
+ 'upload_date': '20190912',
+ 'uploader': 'phoenix',
+ 'uploader_id': 'phoenix',
+ }
+ },
+ {
+ 'url': 'https://www.phoenix.de/drohnenangriffe-in-saudi-arabien-a-1286995.html?ref=aktuelles',
+ 'only_matching': True,
+ },
+ # an older page: https://www.phoenix.de/sendungen/gespraeche/phoenix-persoenlich/im-dialog-a-177727.html
+ # seems to not have an embedded video, even though it's uploaded on youtube: https://www.youtube.com/watch?v=4GxnoUHvOkM
+ ]
+
+ def extract_from_json_api(self, video_id, api_url):
+ doc = self._download_json(
+ api_url, video_id,
+ note="Downloading webpage metadata",
+ errnote="Failed to load webpage metadata")
+
+ for a in doc["absaetze"]:
+ if a["typ"] == "video-youtube":
+ return {
+ '_type': 'url_transparent',
+ 'id': a["id"],
+ 'title': doc["titel"],
+ 'url': "https://www.youtube.com/watch?v=%s" % a["id"],
+ 'ie_key': 'Youtube',
+ }
+ raise ExtractorError("No downloadable video found", expected=True)
+
+ def _real_extract(self, url):
+ page_id = self._match_id(url)
+ api_url = 'https://www.phoenix.de/response/id/%s' % page_id
+ return self.extract_from_json_api(page_id, api_url)
diff --git a/youtube_dl/extractor/photobucket.py b/youtube_dlc/extractor/photobucket.py
index 6c8bbe1d9..6c8bbe1d9 100644
--- a/youtube_dl/extractor/photobucket.py
+++ b/youtube_dlc/extractor/photobucket.py
diff --git a/youtube_dl/extractor/picarto.py b/youtube_dlc/extractor/picarto.py
index 8099ef1d6..8099ef1d6 100644
--- a/youtube_dl/extractor/picarto.py
+++ b/youtube_dlc/extractor/picarto.py
diff --git a/youtube_dl/extractor/piksel.py b/youtube_dlc/extractor/piksel.py
index 88b6859b0..88b6859b0 100644
--- a/youtube_dl/extractor/piksel.py
+++ b/youtube_dlc/extractor/piksel.py
diff --git a/youtube_dl/extractor/pinkbike.py b/youtube_dlc/extractor/pinkbike.py
index 9f3501f77..9f3501f77 100644
--- a/youtube_dl/extractor/pinkbike.py
+++ b/youtube_dlc/extractor/pinkbike.py
diff --git a/youtube_dl/extractor/pladform.py b/youtube_dlc/extractor/pladform.py
index e86c65396..e86c65396 100644
--- a/youtube_dl/extractor/pladform.py
+++ b/youtube_dlc/extractor/pladform.py
diff --git a/youtube_dlc/extractor/platzi.py b/youtube_dlc/extractor/platzi.py
new file mode 100644
index 000000000..23c8256b5
--- /dev/null
+++ b/youtube_dlc/extractor/platzi.py
@@ -0,0 +1,224 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_str,
+)
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ str_or_none,
+ try_get,
+ url_or_none,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class PlatziBaseIE(InfoExtractor):
+ _LOGIN_URL = 'https://platzi.com/login/'
+ _NETRC_MACHINE = 'platzi'
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login page')
+
+ login_form = self._hidden_inputs(login_page)
+
+ login_form.update({
+ 'email': username,
+ 'password': password,
+ })
+
+ urlh = self._request_webpage(
+ self._LOGIN_URL, None, 'Logging in',
+ data=urlencode_postdata(login_form),
+ headers={'Referer': self._LOGIN_URL})
+
+ # login succeeded
+ if 'platzi.com/login' not in urlh.geturl():
+ return
+
+ login_error = self._webpage_read_content(
+ urlh, self._LOGIN_URL, None, 'Downloading login error page')
+
+ login = self._parse_json(
+ self._search_regex(
+ r'login\s*=\s*({.+?})(?:\s*;|\s*</script)', login_error, 'login'),
+ None)
+
+ for kind in ('error', 'password', 'nonFields'):
+ error = str_or_none(login.get('%sError' % kind))
+ if error:
+ raise ExtractorError(
+ 'Unable to login: %s' % error, expected=True)
+ raise ExtractorError('Unable to log in')
+
+
+class PlatziIE(PlatziBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ platzi\.com/clases| # es version
+ courses\.platzi\.com/classes # en version
+ )/[^/]+/(?P<id>\d+)-[^/?\#&]+
+ '''
+
+ _TESTS = [{
+ 'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/',
+ 'md5': '8f56448241005b561c10f11a595b37e3',
+ 'info_dict': {
+ 'id': '12074',
+ 'ext': 'mp4',
+ 'title': 'Creando nuestra primera página',
+ 'description': 'md5:4c866e45034fc76412fbf6e60ae008bc',
+ 'duration': 420,
+ },
+ 'skip': 'Requires platzi account credentials',
+ }, {
+ 'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/',
+ 'info_dict': {
+ 'id': '13430',
+ 'ext': 'mp4',
+ 'title': 'Background',
+ 'description': 'md5:49c83c09404b15e6e71defaf87f6b305',
+ 'duration': 360,
+ },
+ 'skip': 'Requires platzi account credentials',
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ lecture_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, lecture_id)
+
+ data = self._parse_json(
+ self._search_regex(
+ # client_data may contain "};" so that we have to try more
+ # strict regex first
+ (r'client_data\s*=\s*({.+?})\s*;\s*\n',
+ r'client_data\s*=\s*({.+?})\s*;'),
+ webpage, 'client data'),
+ lecture_id)
+
+ material = data['initialState']['material']
+ desc = material['description']
+ title = desc['title']
+
+ formats = []
+ for server_id, server in material['videos'].items():
+ if not isinstance(server, dict):
+ continue
+ for format_id in ('hls', 'dash'):
+ format_url = url_or_none(server.get(format_id))
+ if not format_url:
+ continue
+ if format_id == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, lecture_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id=format_id,
+ note='Downloading %s m3u8 information' % server_id,
+ fatal=False))
+ elif format_id == 'dash':
+ formats.extend(self._extract_mpd_formats(
+ format_url, lecture_id, mpd_id=format_id,
+ note='Downloading %s MPD manifest' % server_id,
+ fatal=False))
+ self._sort_formats(formats)
+
+ content = str_or_none(desc.get('content'))
+ description = (clean_html(compat_b64decode(content).decode('utf-8'))
+ if content else None)
+ duration = int_or_none(material.get('duration'), invscale=60)
+
+ return {
+ 'id': lecture_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'formats': formats,
+ }
+
+
+class PlatziCourseIE(PlatziBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ platzi\.com/clases| # es version
+ courses\.platzi\.com/classes # en version
+ )/(?P<id>[^/?\#&]+)
+ '''
+ _TESTS = [{
+ 'url': 'https://platzi.com/clases/next-js/',
+ 'info_dict': {
+ 'id': '1311',
+ 'title': 'Curso de Next.js',
+ },
+ 'playlist_count': 22,
+ }, {
+ 'url': 'https://courses.platzi.com/classes/communication-codestream/',
+ 'info_dict': {
+ 'id': '1367',
+ 'title': 'Codestream Course',
+ },
+ 'playlist_count': 14,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if PlatziIE.suitable(url) else super(PlatziCourseIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ course_name = self._match_id(url)
+
+ webpage = self._download_webpage(url, course_name)
+
+ props = self._parse_json(
+ self._search_regex(r'data\s*=\s*({.+?})\s*;', webpage, 'data'),
+ course_name)['initialProps']
+
+ entries = []
+ for chapter_num, chapter in enumerate(props['concepts'], 1):
+ if not isinstance(chapter, dict):
+ continue
+ materials = chapter.get('materials')
+ if not materials or not isinstance(materials, list):
+ continue
+ chapter_title = chapter.get('title')
+ chapter_id = str_or_none(chapter.get('id'))
+ for material in materials:
+ if not isinstance(material, dict):
+ continue
+ if material.get('material_type') != 'video':
+ continue
+ video_url = urljoin(url, material.get('url'))
+ if not video_url:
+ continue
+ entries.append({
+ '_type': 'url_transparent',
+ 'url': video_url,
+ 'title': str_or_none(material.get('name')),
+ 'id': str_or_none(material.get('id')),
+ 'ie_key': PlatziIE.ie_key(),
+ 'chapter': chapter_title,
+ 'chapter_number': chapter_num,
+ 'chapter_id': chapter_id,
+ })
+
+ course_id = compat_str(try_get(props, lambda x: x['course']['id']))
+ course_title = try_get(props, lambda x: x['course']['name'], compat_str)
+
+ return self.playlist_result(entries, course_id, course_title)
diff --git a/youtube_dl/extractor/playfm.py b/youtube_dlc/extractor/playfm.py
index e766ccca3..e766ccca3 100644
--- a/youtube_dl/extractor/playfm.py
+++ b/youtube_dlc/extractor/playfm.py
diff --git a/youtube_dl/extractor/playplustv.py b/youtube_dlc/extractor/playplustv.py
index 1e30ab23a..1e30ab23a 100644
--- a/youtube_dl/extractor/playplustv.py
+++ b/youtube_dlc/extractor/playplustv.py
diff --git a/youtube_dl/extractor/plays.py b/youtube_dlc/extractor/plays.py
index ddfc6f148..ddfc6f148 100644
--- a/youtube_dl/extractor/plays.py
+++ b/youtube_dlc/extractor/plays.py
diff --git a/youtube_dl/extractor/playtvak.py b/youtube_dlc/extractor/playtvak.py
index 4c5f57919..4c5f57919 100644
--- a/youtube_dl/extractor/playtvak.py
+++ b/youtube_dlc/extractor/playtvak.py
diff --git a/youtube_dl/extractor/playvid.py b/youtube_dlc/extractor/playvid.py
index 4aef186ea..4aef186ea 100644
--- a/youtube_dl/extractor/playvid.py
+++ b/youtube_dlc/extractor/playvid.py
diff --git a/youtube_dl/extractor/playwire.py b/youtube_dlc/extractor/playwire.py
index 4d96a10a7..4d96a10a7 100644
--- a/youtube_dl/extractor/playwire.py
+++ b/youtube_dlc/extractor/playwire.py
diff --git a/youtube_dl/extractor/pluralsight.py b/youtube_dlc/extractor/pluralsight.py
index abd08bc28..abd08bc28 100644
--- a/youtube_dl/extractor/pluralsight.py
+++ b/youtube_dlc/extractor/pluralsight.py
diff --git a/youtube_dl/extractor/podomatic.py b/youtube_dlc/extractor/podomatic.py
index e782e3f1f..e782e3f1f 100644
--- a/youtube_dl/extractor/podomatic.py
+++ b/youtube_dlc/extractor/podomatic.py
diff --git a/youtube_dlc/extractor/pokemon.py b/youtube_dlc/extractor/pokemon.py
new file mode 100644
index 000000000..14ee1a72e
--- /dev/null
+++ b/youtube_dlc/extractor/pokemon.py
@@ -0,0 +1,138 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ extract_attributes,
+ int_or_none,
+ js_to_json,
+ merge_dicts,
+)
+
+
+class PokemonIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/(?:[^/]+/)+(?P<display_id>[^/?#&]+))'
+ _TESTS = [{
+ 'url': 'https://www.pokemon.com/us/pokemon-episodes/20_30-the-ol-raise-and-switch/',
+ 'md5': '2fe8eaec69768b25ef898cda9c43062e',
+ 'info_dict': {
+ 'id': 'afe22e30f01c41f49d4f1d9eab5cd9a4',
+ 'ext': 'mp4',
+ 'title': 'The Ol’ Raise and Switch!',
+ 'description': 'md5:7db77f7107f98ba88401d3adc80ff7af',
+ },
+ 'add_id': ['LimelightMedia'],
+ }, {
+ # no data-video-title
+ 'url': 'https://www.pokemon.com/fr/episodes-pokemon/films-pokemon/pokemon-lascension-de-darkrai-2008',
+ 'info_dict': {
+ 'id': 'dfbaf830d7e54e179837c50c0c6cc0e1',
+ 'ext': 'mp4',
+ 'title': "Pokémon : L'ascension de Darkrai",
+ 'description': 'md5:d1dbc9e206070c3e14a06ff557659fb5',
+ },
+ 'add_id': ['LimelightMedia'],
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.pokemon.com/fr/episodes-pokemon/18_09-un-hiver-inattendu/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.pokemon.com/de/pokemon-folgen/01_20-bye-bye-smettbo/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id, display_id = re.match(self._VALID_URL, url).groups()
+ webpage = self._download_webpage(url, video_id or display_id)
+ video_data = extract_attributes(self._search_regex(
+ r'(<[^>]+data-video-id="%s"[^>]*>)' % (video_id if video_id else '[a-z0-9]{32}'),
+ webpage, 'video data element'))
+ video_id = video_data['data-video-id']
+ title = video_data.get('data-video-title') or self._html_search_meta(
+ 'pkm-title', webpage, ' title', default=None) or self._search_regex(
+ r'<h1[^>]+\bclass=["\']us-title[^>]+>([^<]+)', webpage, 'title')
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'url': 'limelight:media:%s' % video_id,
+ 'title': title,
+ 'description': video_data.get('data-video-summary'),
+ 'thumbnail': video_data.get('data-video-poster'),
+ 'series': 'Pokémon',
+ 'season_number': int_or_none(video_data.get('data-video-season')),
+ 'episode': title,
+ 'episode_number': int_or_none(video_data.get('data-video-episode')),
+ 'ie_key': 'LimelightMedia',
+ }
+
+
+class PokemonWatchIE(InfoExtractor):
+ _VALID_URL = r'https?://watch\.pokemon\.com/[a-z]{2}-[a-z]{2}/player\.html\?id=(?P<id>[a-z0-9]{32})'
+ _API_URL = 'https://www.pokemon.com/api/pokemontv/v2/channels/{0:}'
+ _TESTS = [{
+ 'url': 'https://watch.pokemon.com/en-us/player.html?id=8309a40969894a8e8d5bc1311e9c5667',
+ 'md5': '62833938a31e61ab49ada92f524c42ff',
+ 'info_dict': {
+ 'id': '8309a40969894a8e8d5bc1311e9c5667',
+ 'ext': 'mp4',
+ 'title': 'Lillier and the Staff!',
+ 'description': 'md5:338841b8c21b283d24bdc9b568849f04',
+ }
+ }, {
+ 'url': 'https://watch.pokemon.com/de-de/player.html?id=b3c402e111a4459eb47e12160ab0ba07',
+ 'only_matching': True
+ }]
+
+ def _extract_media(self, channel_array, video_id):
+ for channel in channel_array:
+ for media in channel.get('media'):
+ if media.get('id') == video_id:
+ return media
+ return None
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ info = {
+ '_type': 'url',
+ 'id': video_id,
+ 'url': 'limelight:media:%s' % video_id,
+ 'ie_key': 'LimelightMedia',
+ }
+
+ # API call can be avoided entirely if we are listing formats
+ if self._downloader.params.get('listformats', False):
+ return info
+
+ webpage = self._download_webpage(url, video_id)
+ build_vars = self._parse_json(self._search_regex(
+ r'(?s)buildVars\s*=\s*({.*?})', webpage, 'build vars'),
+ video_id, transform_source=js_to_json)
+ region = build_vars.get('region')
+ channel_array = self._download_json(self._API_URL.format(region), video_id)
+ video_data = self._extract_media(channel_array, video_id)
+
+ if video_data is None:
+ raise ExtractorError(
+ 'Video %s does not exist' % video_id, expected=True)
+
+ info['_type'] = 'url_transparent'
+ images = video_data.get('images')
+
+ return merge_dicts(info, {
+ 'title': video_data.get('title'),
+ 'description': video_data.get('description'),
+ 'thumbnail': images.get('medium') or images.get('small'),
+ 'series': 'Pokémon',
+ 'season_number': int_or_none(video_data.get('season')),
+ 'episode': video_data.get('title'),
+ 'episode_number': int_or_none(video_data.get('episode')),
+ })
diff --git a/youtube_dl/extractor/polskieradio.py b/youtube_dlc/extractor/polskieradio.py
index 978d6f813..978d6f813 100644
--- a/youtube_dl/extractor/polskieradio.py
+++ b/youtube_dlc/extractor/polskieradio.py
diff --git a/youtube_dlc/extractor/popcorntimes.py b/youtube_dlc/extractor/popcorntimes.py
new file mode 100644
index 000000000..7bf7f9858
--- /dev/null
+++ b/youtube_dlc/extractor/popcorntimes.py
@@ -0,0 +1,99 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_chr,
+)
+from ..utils import int_or_none
+
+
+class PopcorntimesIE(InfoExtractor):
+ _VALID_URL = r'https?://popcorntimes\.tv/[^/]+/m/(?P<id>[^/]+)/(?P<display_id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://popcorntimes.tv/de/m/A1XCFvz/haensel-und-gretel-opera-fantasy',
+ 'md5': '93f210991ad94ba8c3485950a2453257',
+ 'info_dict': {
+ 'id': 'A1XCFvz',
+ 'display_id': 'haensel-und-gretel-opera-fantasy',
+ 'ext': 'mp4',
+ 'title': 'Hänsel und Gretel',
+ 'description': 'md5:1b8146791726342e7b22ce8125cf6945',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'creator': 'John Paul',
+ 'release_date': '19541009',
+ 'duration': 4260,
+ 'tbr': 5380,
+ 'width': 720,
+ 'height': 540,
+ },
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id, display_id = mobj.group('id', 'display_id')
+
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._search_regex(
+ r'<h1>([^<]+)', webpage, 'title',
+ default=None) or self._html_search_meta(
+ 'ya:ovs:original_name', webpage, 'title', fatal=True)
+
+ loc = self._search_regex(
+ r'PCTMLOC\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'loc',
+ group='value')
+
+ loc_b64 = ''
+ for c in loc:
+ c_ord = ord(c)
+ if ord('a') <= c_ord <= ord('z') or ord('A') <= c_ord <= ord('Z'):
+ upper = ord('Z') if c_ord <= ord('Z') else ord('z')
+ c_ord += 13
+ if upper < c_ord:
+ c_ord -= 26
+ loc_b64 += compat_chr(c_ord)
+
+ video_url = compat_b64decode(loc_b64).decode('utf-8')
+
+ description = self._html_search_regex(
+ r'(?s)<div[^>]+class=["\']pt-movie-desc[^>]+>(.+?)</div>', webpage,
+ 'description', fatal=False)
+
+ thumbnail = self._search_regex(
+ r'<img[^>]+class=["\']video-preview[^>]+\bsrc=(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'thumbnail', default=None,
+ group='value') or self._og_search_thumbnail(webpage)
+
+ creator = self._html_search_meta(
+ 'video:director', webpage, 'creator', default=None)
+
+ release_date = self._html_search_meta(
+ 'video:release_date', webpage, default=None)
+ if release_date:
+ release_date = release_date.replace('-', '')
+
+ def int_meta(name):
+ return int_or_none(self._html_search_meta(
+ name, webpage, default=None))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'creator': creator,
+ 'release_date': release_date,
+ 'duration': int_meta('video:duration'),
+ 'tbr': int_meta('ya:ovs:bitrate'),
+ 'width': int_meta('og:video:width'),
+ 'height': int_meta('og:video:height'),
+ 'http_headers': {
+ 'Referer': url,
+ },
+ }
diff --git a/youtube_dl/extractor/popcorntv.py b/youtube_dlc/extractor/popcorntv.py
index 9f834fb6c..9f834fb6c 100644
--- a/youtube_dl/extractor/popcorntv.py
+++ b/youtube_dlc/extractor/popcorntv.py
diff --git a/youtube_dl/extractor/porn91.py b/youtube_dlc/extractor/porn91.py
index 20eac647a..20eac647a 100644
--- a/youtube_dl/extractor/porn91.py
+++ b/youtube_dlc/extractor/porn91.py
diff --git a/youtube_dl/extractor/porncom.py b/youtube_dlc/extractor/porncom.py
index 5726cab3a..5726cab3a 100644
--- a/youtube_dl/extractor/porncom.py
+++ b/youtube_dlc/extractor/porncom.py
diff --git a/youtube_dlc/extractor/pornhd.py b/youtube_dlc/extractor/pornhd.py
new file mode 100644
index 000000000..c6052ac9f
--- /dev/null
+++ b/youtube_dlc/extractor/pornhd.py
@@ -0,0 +1,121 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ js_to_json,
+ merge_dicts,
+ urljoin,
+)
+
+
+class PornHdIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?pornhd\.com/(?:[a-z]{2,4}/)?videos/(?P<id>\d+)(?:/(?P<display_id>.+))?'
+ _TESTS = [{
+ 'url': 'http://www.pornhd.com/videos/9864/selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video',
+ 'md5': '87f1540746c1d32ec7a2305c12b96b25',
+ 'info_dict': {
+ 'id': '9864',
+ 'display_id': 'selfie-restroom-masturbation-fun-with-chubby-cutie-hd-porn-video',
+ 'ext': 'mp4',
+ 'title': 'Restroom selfie masturbation',
+ 'description': 'md5:3748420395e03e31ac96857a8f125b2b',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'view_count': int,
+ 'like_count': int,
+ 'age_limit': 18,
+ },
+ 'skip': 'HTTP Error 404: Not Found',
+ }, {
+ 'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
+ 'md5': '1b7b3a40b9d65a8e5b25f7ab9ee6d6de',
+ 'info_dict': {
+ 'id': '1962',
+ 'display_id': 'sierra-day-gets-his-cum-all-over-herself-hd-porn-video',
+ 'ext': 'mp4',
+ 'title': 'md5:98c6f8b2d9c229d0f0fde47f61a1a759',
+ 'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'view_count': int,
+ 'like_count': int,
+ 'age_limit': 18,
+ },
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ webpage = self._download_webpage(url, display_id or video_id)
+
+ title = self._html_search_regex(
+ [r'<span[^>]+class=["\']video-name["\'][^>]*>([^<]+)',
+ r'<title>(.+?) - .*?[Pp]ornHD.*?</title>'], webpage, 'title')
+
+ sources = self._parse_json(js_to_json(self._search_regex(
+ r"(?s)sources'?\s*[:=]\s*(\{.+?\})",
+ webpage, 'sources', default='{}')), video_id)
+
+ info = {}
+ if not sources:
+ entries = self._parse_html5_media_entries(url, webpage, video_id)
+ if entries:
+ info = entries[0]
+
+ if not sources and not info:
+ message = self._html_search_regex(
+ r'(?s)<(div|p)[^>]+class="no-video"[^>]*>(?P<value>.+?)</\1',
+ webpage, 'error message', group='value')
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
+
+ formats = []
+ for format_id, video_url in sources.items():
+ video_url = urljoin(url, video_url)
+ if not video_url:
+ continue
+ height = int_or_none(self._search_regex(
+ r'^(\d+)[pP]', format_id, 'height', default=None))
+ formats.append({
+ 'url': video_url,
+ 'ext': determine_ext(video_url, 'mp4'),
+ 'format_id': format_id,
+ 'height': height,
+ })
+ if formats:
+ info['formats'] = formats
+ self._sort_formats(info['formats'])
+
+ description = self._html_search_regex(
+ (r'(?s)<section[^>]+class=["\']video-description[^>]+>(?P<value>.+?)</section>',
+ r'<(div|p)[^>]+class="description"[^>]*>(?P<value>[^<]+)</\1'),
+ webpage, 'description', fatal=False,
+ group='value') or self._html_search_meta(
+ 'description', webpage, default=None) or self._og_search_description(webpage)
+ view_count = int_or_none(self._html_search_regex(
+ r'(\d+) views\s*<', webpage, 'view count', fatal=False))
+ thumbnail = self._search_regex(
+ r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage,
+ 'thumbnail', default=None, group='url')
+
+ like_count = int_or_none(self._search_regex(
+ (r'(\d+)</span>\s*likes',
+ r'(\d+)\s*</11[^>]+>(?:&nbsp;|\s)*\blikes',
+ r'class=["\']save-count["\'][^>]*>\s*(\d+)'),
+ webpage, 'like count', fatal=False))
+
+ return merge_dicts(info, {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'formats': formats,
+ 'age_limit': 18,
+ })
diff --git a/youtube_dlc/extractor/pornhub.py b/youtube_dlc/extractor/pornhub.py
new file mode 100644
index 000000000..3567a3283
--- /dev/null
+++ b/youtube_dlc/extractor/pornhub.py
@@ -0,0 +1,611 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import functools
+import itertools
+import operator
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+ compat_urllib_request,
+)
+from .openload import PhantomJSwrapper
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ NO_DEFAULT,
+ orderedSet,
+ remove_quotes,
+ str_to_int,
+ url_or_none,
+)
+
+
+class PornHubBaseIE(InfoExtractor):
+ def _download_webpage_handle(self, *args, **kwargs):
+ def dl(*args, **kwargs):
+ return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs)
+
+ webpage, urlh = dl(*args, **kwargs)
+
+ if any(re.search(p, webpage) for p in (
+ r'<body\b[^>]+\bonload=["\']go\(\)',
+ r'document\.cookie\s*=\s*["\']RNKEY=',
+ r'document\.location\.reload\(true\)')):
+ url_or_request = args[0]
+ url = (url_or_request.get_full_url()
+ if isinstance(url_or_request, compat_urllib_request.Request)
+ else url_or_request)
+ phantom = PhantomJSwrapper(self, required_version='2.0')
+ phantom.get(url, html=webpage)
+ webpage, urlh = dl(*args, **kwargs)
+
+ return webpage, urlh
+
+
+class PornHubIE(PornHubBaseIE):
+ IE_DESC = 'PornHub and Thumbzilla'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)|
+ (?:www\.)?thumbzilla\.com/video/
+ )
+ (?P<id>[\da-z]+)
+ '''
+ _TESTS = [{
+ 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
+ 'md5': '1e19b41231a02eba417839222ac9d58e',
+ 'info_dict': {
+ 'id': '648719015',
+ 'ext': 'mp4',
+ 'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
+ 'uploader': 'Babes',
+ 'upload_date': '20130628',
+ 'duration': 361,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'age_limit': 18,
+ 'tags': list,
+ 'categories': list,
+ },
+ }, {
+ # non-ASCII title
+ 'url': 'http://www.pornhub.com/view_video.php?viewkey=1331683002',
+ 'info_dict': {
+ 'id': '1331683002',
+ 'ext': 'mp4',
+ 'title': '重庆婷婷女王足交',
+ 'uploader': 'Unknown',
+ 'upload_date': '20150213',
+ 'duration': 1753,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'age_limit': 18,
+ 'tags': list,
+ 'categories': list,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # subtitles
+ 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
+ 'info_dict': {
+ 'id': 'ph5af5fef7c2aa7',
+ 'ext': 'mp4',
+ 'title': 'BFFS - Cute Teen Girls Share Cock On the Floor',
+ 'uploader': 'BFFs',
+ 'duration': 622,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'comment_count': int,
+ 'age_limit': 18,
+ 'tags': list,
+ 'categories': list,
+ 'subtitles': {
+ 'en': [{
+ "ext": 'srt'
+ }]
+ },
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
+ 'only_matching': True,
+ }, {
+ # removed at the request of cam4.com
+ 'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862',
+ 'only_matching': True,
+ }, {
+ # removed at the request of the copyright owner
+ 'url': 'http://www.pornhub.com/view_video.php?viewkey=788152859',
+ 'only_matching': True,
+ }, {
+ # removed by uploader
+ 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph572716d15a111',
+ 'only_matching': True,
+ }, {
+ # private video
+ 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph56fd731fce6b7',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.pornhub.com/video/show?viewkey=648719015',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net)/embed/[\da-z]+)',
+ webpage)
+
+ def _extract_count(self, pattern, webpage, name):
+ return str_to_int(self._search_regex(
+ pattern, webpage, '%s count' % name, fatal=False))
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ host = mobj.group('host') or 'pornhub.com'
+ video_id = mobj.group('id')
+
+ if 'premium' in host:
+ if not self._downloader.params.get('cookiefile'):
+ raise ExtractorError(
+ 'PornHub Premium requires authentication.'
+ ' You may want to use --cookies.',
+ expected=True)
+
+ self._set_cookie(host, 'age_verified', '1')
+
+ def dl_webpage(platform):
+ self._set_cookie(host, 'platform', platform)
+ return self._download_webpage(
+ 'https://www.%s/view_video.php?viewkey=%s' % (host, video_id),
+ video_id, 'Downloading %s webpage' % platform)
+
+ webpage = dl_webpage('pc')
+
+ error_msg = self._html_search_regex(
+ r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
+ webpage, 'error message', default=None, group='error')
+ if error_msg:
+ error_msg = re.sub(r'\s+', ' ', error_msg)
+ raise ExtractorError(
+ 'PornHub said: %s' % error_msg,
+ expected=True, video_id=video_id)
+
+ # video_title from flashvars contains whitespace instead of non-ASCII (see
+ # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying
+ # on that anymore.
+ title = self._html_search_meta(
+ 'twitter:title', webpage, default=None) or self._html_search_regex(
+ (r'(?s)<h1[^>]+class=["\']title["\'][^>]*>(?P<title>.+?)</h1>',
+ r'<div[^>]+data-video-title=(["\'])(?P<title>(?:(?!\1).)+)\1',
+ r'shareTitle["\']\s*[=:]\s*(["\'])(?P<title>(?:(?!\1).)+)\1'),
+ webpage, 'title', group='title')
+
+ video_urls = []
+ video_urls_set = set()
+ subtitles = {}
+
+ flashvars = self._parse_json(
+ self._search_regex(
+ r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'),
+ video_id)
+ if flashvars:
+ subtitle_url = url_or_none(flashvars.get('closedCaptionsFile'))
+ if subtitle_url:
+ subtitles.setdefault('en', []).append({
+ 'url': subtitle_url,
+ 'ext': 'srt',
+ })
+ thumbnail = flashvars.get('image_url')
+ duration = int_or_none(flashvars.get('video_duration'))
+ media_definitions = flashvars.get('mediaDefinitions')
+ if isinstance(media_definitions, list):
+ for definition in media_definitions:
+ if not isinstance(definition, dict):
+ continue
+ video_url = definition.get('videoUrl')
+ if not video_url or not isinstance(video_url, compat_str):
+ continue
+ if video_url in video_urls_set:
+ continue
+ video_urls_set.add(video_url)
+ video_urls.append(
+ (video_url, int_or_none(definition.get('quality'))))
+ else:
+ thumbnail, duration = [None] * 2
+
+ def extract_js_vars(webpage, pattern, default=NO_DEFAULT):
+ assignments = self._search_regex(
+ pattern, webpage, 'encoded url', default=default)
+ if not assignments:
+ return {}
+
+ assignments = assignments.split(';')
+
+ js_vars = {}
+
+ def parse_js_value(inp):
+ inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp)
+ if '+' in inp:
+ inps = inp.split('+')
+ return functools.reduce(
+ operator.concat, map(parse_js_value, inps))
+ inp = inp.strip()
+ if inp in js_vars:
+ return js_vars[inp]
+ return remove_quotes(inp)
+
+ for assn in assignments:
+ assn = assn.strip()
+ if not assn:
+ continue
+ assn = re.sub(r'var\s+', '', assn)
+ vname, value = assn.split('=', 1)
+ js_vars[vname] = parse_js_value(value)
+ return js_vars
+
+ def add_video_url(video_url):
+ v_url = url_or_none(video_url)
+ if not v_url:
+ return
+ if v_url in video_urls_set:
+ return
+ video_urls.append((v_url, None))
+ video_urls_set.add(v_url)
+
+ if not video_urls:
+ FORMAT_PREFIXES = ('media', 'quality')
+ js_vars = extract_js_vars(
+ webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES),
+ default=None)
+ if js_vars:
+ for key, format_url in js_vars.items():
+ if any(key.startswith(p) for p in FORMAT_PREFIXES):
+ add_video_url(format_url)
+ if not video_urls and re.search(
+ r'<[^>]+\bid=["\']lockedPlayer', webpage):
+ raise ExtractorError(
+ 'Video %s is locked' % video_id, expected=True)
+
+ if not video_urls:
+ js_vars = extract_js_vars(
+ dl_webpage('tv'), r'(var.+?mediastring.+?)</script>')
+ add_video_url(js_vars['mediastring'])
+
+ for mobj in re.finditer(
+ r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1',
+ webpage):
+ video_url = mobj.group('url')
+ if video_url not in video_urls_set:
+ video_urls.append((video_url, None))
+ video_urls_set.add(video_url)
+
+ upload_date = None
+ formats = []
+ for video_url, height in video_urls:
+ if not upload_date:
+ upload_date = self._search_regex(
+ r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None)
+ if upload_date:
+ upload_date = upload_date.replace('/', '')
+ ext = determine_ext(video_url)
+ if ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ video_url, video_id, mpd_id='dash', fatal=False))
+ continue
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ continue
+ tbr = None
+ mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
+ if mobj:
+ if not height:
+ height = int(mobj.group('height'))
+ tbr = int(mobj.group('tbr'))
+ formats.append({
+ 'url': video_url,
+ 'format_id': '%dp' % height if height else None,
+ 'height': height,
+ 'tbr': tbr,
+ })
+ self._sort_formats(formats)
+
+ video_uploader = self._html_search_regex(
+ r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
+ webpage, 'uploader', fatal=False)
+
+ view_count = self._extract_count(
+ r'<span class="count">([\d,\.]+)</span> views', webpage, 'view')
+ like_count = self._extract_count(
+ r'<span class="votesUp">([\d,\.]+)</span>', webpage, 'like')
+ dislike_count = self._extract_count(
+ r'<span class="votesDown">([\d,\.]+)</span>', webpage, 'dislike')
+ comment_count = self._extract_count(
+ r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')
+
+ def extract_list(meta_key):
+ div = self._search_regex(
+ r'(?s)<div[^>]+\bclass=["\'].*?\b%sWrapper[^>]*>(.+?)</div>'
+ % meta_key, webpage, meta_key, default=None)
+ if div:
+ return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div)
+
+ return {
+ 'id': video_id,
+ 'uploader': video_uploader,
+ 'upload_date': upload_date,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
+ 'comment_count': comment_count,
+ 'formats': formats,
+ 'age_limit': 18,
+ 'tags': extract_list('tags'),
+ 'categories': extract_list('categories'),
+ 'subtitles': subtitles,
+ }
+
+
+class PornHubPlaylistBaseIE(PornHubBaseIE):
+ def _extract_entries(self, webpage, host):
+ # Only process container div with main playlist content skipping
+ # drop-down menu that uses similar pattern for videos (see
+ # https://github.com/ytdl-org/youtube-dl/issues/11594).
+ container = self._search_regex(
+ r'(?s)(<div[^>]+class=["\']container.+)', webpage,
+ 'container', default=webpage)
+
+ return [
+ self.url_result(
+ 'http://www.%s/%s' % (host, video_url),
+ PornHubIE.ie_key(), video_title=title)
+ for video_url, title in orderedSet(re.findall(
+ r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
+ container))
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ host = mobj.group('host')
+ playlist_id = mobj.group('id')
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = self._extract_entries(webpage, host)
+
+ playlist = self._parse_json(
+ self._search_regex(
+ r'(?:playlistObject|PLAYLIST_VIEW)\s*=\s*({.+?});', webpage,
+ 'playlist', default='{}'),
+ playlist_id, fatal=False)
+ title = playlist.get('title') or self._search_regex(
+ r'>Videos\s+in\s+(.+?)\s+[Pp]laylist<', webpage, 'title', fatal=False)
+
+ return self.playlist_result(
+ entries, playlist_id, title, playlist.get('description'))
+
+
+class PornHubUserIE(PornHubPlaylistBaseIE):
+ _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)'
+ _TESTS = [{
+ 'url': 'https://www.pornhub.com/model/zoe_ph',
+ 'playlist_mincount': 118,
+ }, {
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious',
+ 'info_dict': {
+ 'id': 'liz-vicious',
+ },
+ 'playlist_mincount': 118,
+ }, {
+ 'url': 'https://www.pornhub.com/users/russianveet69',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/channels/povd',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/model/zoe_ph?abc=1',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ user_id = mobj.group('id')
+ return self.url_result(
+ '%s/videos' % mobj.group('url'), ie=PornHubPagedVideoListIE.ie_key(),
+ video_id=user_id)
+
+
+class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE):
+ @staticmethod
+ def _has_more(webpage):
+ return re.search(
+ r'''(?x)
+ <li[^>]+\bclass=["\']page_next|
+ <link[^>]+\brel=["\']next|
+ <button[^>]+\bid=["\']moreDataBtn
+ ''', webpage) is not None
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ host = mobj.group('host')
+ item_id = mobj.group('id')
+
+ page = int_or_none(self._search_regex(
+ r'\bpage=(\d+)', url, 'page', default=None))
+
+ entries = []
+ for page_num in (page, ) if page is not None else itertools.count(1):
+ try:
+ webpage = self._download_webpage(
+ url, item_id, 'Downloading page %d' % page_num,
+ query={'page': page_num})
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
+ break
+ raise
+ page_entries = self._extract_entries(webpage, host)
+ if not page_entries:
+ break
+ entries.extend(page_entries)
+ if not self._has_more(webpage):
+ break
+
+ return self.playlist_result(orderedSet(entries), item_id)
+
+
+class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?P<id>(?:[^/]+/)*[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.pornhub.com/model/zoe_ph/videos',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.pornhub.com/users/rushandlia/videos',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos',
+ 'info_dict': {
+ 'id': 'pornstar/jenny-blighe/videos',
+ },
+ 'playlist_mincount': 149,
+ }, {
+ 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos?page=3',
+ 'info_dict': {
+ 'id': 'pornstar/jenny-blighe/videos',
+ },
+ 'playlist_mincount': 40,
+ }, {
+ # default sorting as Top Rated Videos
+ 'url': 'https://www.pornhub.com/channels/povd/videos',
+ 'info_dict': {
+ 'id': 'channels/povd/videos',
+ },
+ 'playlist_mincount': 293,
+ }, {
+ # Top Rated Videos
+ 'url': 'https://www.pornhub.com/channels/povd/videos?o=ra',
+ 'only_matching': True,
+ }, {
+ # Most Recent Videos
+ 'url': 'https://www.pornhub.com/channels/povd/videos?o=da',
+ 'only_matching': True,
+ }, {
+ # Most Viewed Videos
+ 'url': 'https://www.pornhub.com/channels/povd/videos?o=vi',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
+ 'only_matching': True,
+ }, {
+ # Most Viewed Videos
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=mv',
+ 'only_matching': True,
+ }, {
+ # Top Rated Videos
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=tr',
+ 'only_matching': True,
+ }, {
+ # Longest Videos
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=lg',
+ 'only_matching': True,
+ }, {
+ # Newest Videos
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=cm',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/paid',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/fanonly',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/video',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/video?page=3',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/video/search?search=123',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/categories/teen',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/categories/teen?page=3',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/hd',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/hd?page=3',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/described-video',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/described-video?page=2',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/video/incategories/60fps-1/hd-porn',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.pornhub.com/playlist/44121572',
+ 'info_dict': {
+ 'id': 'playlist/44121572',
+ },
+ 'playlist_mincount': 132,
+ }, {
+ 'url': 'https://www.pornhub.com/playlist/4667351',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://de.pornhub.com/playlist/4667351',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (False
+ if PornHubIE.suitable(url) or PornHubUserIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url)
+ else super(PornHubPagedVideoListIE, cls).suitable(url))
+
+
+class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE):
+ _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)'
+ _TESTS = [{
+ 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
+ 'info_dict': {
+ 'id': 'jenny-blighe',
+ },
+ 'playlist_mincount': 129,
+ }, {
+ 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload',
+ 'only_matching': True,
+ }]
diff --git a/youtube_dl/extractor/pornotube.py b/youtube_dlc/extractor/pornotube.py
index 1b5b9a320..1b5b9a320 100644
--- a/youtube_dl/extractor/pornotube.py
+++ b/youtube_dlc/extractor/pornotube.py
diff --git a/youtube_dl/extractor/pornovoisines.py b/youtube_dlc/extractor/pornovoisines.py
index b6b71069d..b6b71069d 100644
--- a/youtube_dl/extractor/pornovoisines.py
+++ b/youtube_dlc/extractor/pornovoisines.py
diff --git a/youtube_dl/extractor/pornoxo.py b/youtube_dlc/extractor/pornoxo.py
index 2831368b6..2831368b6 100644
--- a/youtube_dl/extractor/pornoxo.py
+++ b/youtube_dlc/extractor/pornoxo.py
diff --git a/youtube_dl/extractor/presstv.py b/youtube_dlc/extractor/presstv.py
index b5c279203..b5c279203 100644
--- a/youtube_dl/extractor/presstv.py
+++ b/youtube_dlc/extractor/presstv.py
diff --git a/youtube_dlc/extractor/prosiebensat1.py b/youtube_dlc/extractor/prosiebensat1.py
new file mode 100644
index 000000000..e47088292
--- /dev/null
+++ b/youtube_dlc/extractor/prosiebensat1.py
@@ -0,0 +1,500 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from hashlib import sha1
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ float_or_none,
+ int_or_none,
+ merge_dicts,
+ unified_strdate,
+)
+
+
+class ProSiebenSat1BaseIE(InfoExtractor):
+ _GEO_BYPASS = False
+ _ACCESS_ID = None
+ _SUPPORTED_PROTOCOLS = 'dash:clear,hls:clear,progressive:clear'
+ _V4_BASE_URL = 'https://vas-v4.p7s1video.net/4.0/get'
+
+ def _extract_video_info(self, url, clip_id):
+ client_location = url
+
+ video = self._download_json(
+ 'http://vas.sim-technik.de/vas/live/v2/videos',
+ clip_id, 'Downloading videos JSON', query={
+ 'access_token': self._TOKEN,
+ 'client_location': client_location,
+ 'client_name': self._CLIENT_NAME,
+ 'ids': clip_id,
+ })[0]
+
+ if video.get('is_protected') is True:
+ raise ExtractorError('This video is DRM protected.', expected=True)
+
+ formats = []
+ if self._ACCESS_ID:
+ raw_ct = self._ENCRYPTION_KEY + clip_id + self._IV + self._ACCESS_ID
+ protocols = self._download_json(
+ self._V4_BASE_URL + 'protocols', clip_id,
+ 'Downloading protocols JSON',
+ headers=self.geo_verification_headers(), query={
+ 'access_id': self._ACCESS_ID,
+ 'client_token': sha1((raw_ct).encode()).hexdigest(),
+ 'video_id': clip_id,
+ }, fatal=False, expected_status=(403,)) or {}
+ error = protocols.get('error') or {}
+ if error.get('title') == 'Geo check failed':
+ self.raise_geo_restricted(countries=['AT', 'CH', 'DE'])
+ server_token = protocols.get('server_token')
+ if server_token:
+ urls = (self._download_json(
+ self._V4_BASE_URL + 'urls', clip_id, 'Downloading urls JSON', query={
+ 'access_id': self._ACCESS_ID,
+ 'client_token': sha1((raw_ct + server_token + self._SUPPORTED_PROTOCOLS).encode()).hexdigest(),
+ 'protocols': self._SUPPORTED_PROTOCOLS,
+ 'server_token': server_token,
+ 'video_id': clip_id,
+ }, fatal=False) or {}).get('urls') or {}
+ for protocol, variant in urls.items():
+ source_url = variant.get('clear', {}).get('url')
+ if not source_url:
+ continue
+ if protocol == 'dash':
+ formats.extend(self._extract_mpd_formats(
+ source_url, clip_id, mpd_id=protocol, fatal=False))
+ elif protocol == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ source_url, clip_id, 'mp4', 'm3u8_native',
+ m3u8_id=protocol, fatal=False))
+ else:
+ formats.append({
+ 'url': source_url,
+ 'format_id': protocol,
+ })
+ if not formats:
+ source_ids = [compat_str(source['id']) for source in video['sources']]
+
+ client_id = self._SALT[:2] + sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
+
+ sources = self._download_json(
+ 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources' % clip_id,
+ clip_id, 'Downloading sources JSON', query={
+ 'access_token': self._TOKEN,
+ 'client_id': client_id,
+ 'client_location': client_location,
+ 'client_name': self._CLIENT_NAME,
+ })
+ server_id = sources['server_id']
+
+ def fix_bitrate(bitrate):
+ bitrate = int_or_none(bitrate)
+ if not bitrate:
+ return None
+ return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate
+
+ for source_id in source_ids:
+ client_id = self._SALT[:2] + sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode('utf-8')).hexdigest()
+ urls = self._download_json(
+ 'http://vas.sim-technik.de/vas/live/v2/videos/%s/sources/url' % clip_id,
+ clip_id, 'Downloading urls JSON', fatal=False, query={
+ 'access_token': self._TOKEN,
+ 'client_id': client_id,
+ 'client_location': client_location,
+ 'client_name': self._CLIENT_NAME,
+ 'server_id': server_id,
+ 'source_ids': source_id,
+ })
+ if not urls:
+ continue
+ if urls.get('status_code') != 0:
+ raise ExtractorError('This video is unavailable', expected=True)
+ urls_sources = urls['sources']
+ if isinstance(urls_sources, dict):
+ urls_sources = urls_sources.values()
+ for source in urls_sources:
+ source_url = source.get('url')
+ if not source_url:
+ continue
+ protocol = source.get('protocol')
+ mimetype = source.get('mimetype')
+ if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ source_url, clip_id, f4m_id='hds', fatal=False))
+ elif mimetype == 'application/x-mpegURL':
+ formats.extend(self._extract_m3u8_formats(
+ source_url, clip_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif mimetype == 'application/dash+xml':
+ formats.extend(self._extract_mpd_formats(
+ source_url, clip_id, mpd_id='dash', fatal=False))
+ else:
+ tbr = fix_bitrate(source['bitrate'])
+ if protocol in ('rtmp', 'rtmpe'):
+ mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url)
+ if not mobj:
+ continue
+ path = mobj.group('path')
+ mp4colon_index = path.rfind('mp4:')
+ app = path[:mp4colon_index]
+ play_path = path[mp4colon_index:]
+ formats.append({
+ 'url': '%s/%s' % (mobj.group('url'), app),
+ 'app': app,
+ 'play_path': play_path,
+ 'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf',
+ 'page_url': 'http://www.prosieben.de',
+ 'tbr': tbr,
+ 'ext': 'flv',
+ 'format_id': 'rtmp%s' % ('-%d' % tbr if tbr else ''),
+ })
+ else:
+ formats.append({
+ 'url': source_url,
+ 'tbr': tbr,
+ 'format_id': 'http%s' % ('-%d' % tbr if tbr else ''),
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'duration': float_or_none(video.get('duration')),
+ 'formats': formats,
+ }
+
+
+class ProSiebenSat1IE(ProSiebenSat1BaseIE):
+ IE_NAME = 'prosiebensat1'
+ IE_DESC = 'ProSiebenSat.1 Digital'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?
+ (?:
+ (?:beta\.)?
+ (?:
+ prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|advopedia
+ )\.(?:de|at|ch)|
+ ran\.de|fem\.com|advopedia\.de|galileo\.tv/video
+ )
+ /(?P<id>.+)
+ '''
+
+ _TESTS = [
+ {
+ # Tests changes introduced in https://github.com/ytdl-org/youtube-dl/pull/6242
+ # in response to fixing https://github.com/ytdl-org/youtube-dl/issues/6215:
+ # - malformed f4m manifest support
+ # - proper handling of URLs starting with `https?://` in 2.0 manifests
+ # - recursive child f4m manifests extraction
+ 'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge',
+ 'info_dict': {
+ 'id': '2104602',
+ 'ext': 'mp4',
+ 'title': 'CIRCUS HALLIGALLI - Episode 18 - Staffel 2',
+ 'description': 'md5:8733c81b702ea472e069bc48bb658fc1',
+ 'upload_date': '20131231',
+ 'duration': 5845.04,
+ 'series': 'CIRCUS HALLIGALLI',
+ 'season_number': 2,
+ 'episode': 'Episode 18 - Staffel 2',
+ 'episode_number': 18,
+ },
+ },
+ {
+ 'url': 'http://www.prosieben.de/videokatalog/Gesellschaft/Leben/Trends/video-Lady-Umstyling-f%C3%BCr-Audrina-Rebekka-Audrina-Fergen-billig-aussehen-Battal-Modica-700544.html',
+ 'info_dict': {
+ 'id': '2570327',
+ 'ext': 'mp4',
+ 'title': 'Lady-Umstyling für Audrina',
+ 'description': 'md5:4c16d0c17a3461a0d43ea4084e96319d',
+ 'upload_date': '20131014',
+ 'duration': 606.76,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'Seems to be broken',
+ },
+ {
+ 'url': 'http://www.prosiebenmaxx.de/tv/experience/video/144-countdown-fuer-die-autowerkstatt-ganze-folge',
+ 'info_dict': {
+ 'id': '2429369',
+ 'ext': 'mp4',
+ 'title': 'Countdown für die Autowerkstatt',
+ 'description': 'md5:809fc051a457b5d8666013bc40698817',
+ 'upload_date': '20140223',
+ 'duration': 2595.04,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'This video is unavailable',
+ },
+ {
+ 'url': 'http://www.sixx.de/stars-style/video/sexy-laufen-in-ugg-boots-clip',
+ 'info_dict': {
+ 'id': '2904997',
+ 'ext': 'mp4',
+ 'title': 'Sexy laufen in Ugg Boots',
+ 'description': 'md5:edf42b8bd5bc4e5da4db4222c5acb7d6',
+ 'upload_date': '20140122',
+ 'duration': 245.32,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'This video is unavailable',
+ },
+ {
+ 'url': 'http://www.sat1.de/film/der-ruecktritt/video/im-interview-kai-wiesinger-clip',
+ 'info_dict': {
+ 'id': '2906572',
+ 'ext': 'mp4',
+ 'title': 'Im Interview: Kai Wiesinger',
+ 'description': 'md5:e4e5370652ec63b95023e914190b4eb9',
+ 'upload_date': '20140203',
+ 'duration': 522.56,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'This video is unavailable',
+ },
+ {
+ 'url': 'http://www.kabeleins.de/tv/rosins-restaurants/videos/jagd-auf-fertigkost-im-elsthal-teil-2-ganze-folge',
+ 'info_dict': {
+ 'id': '2992323',
+ 'ext': 'mp4',
+ 'title': 'Jagd auf Fertigkost im Elsthal - Teil 2',
+ 'description': 'md5:2669cde3febe9bce13904f701e774eb6',
+ 'upload_date': '20141014',
+ 'duration': 2410.44,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'This video is unavailable',
+ },
+ {
+ 'url': 'http://www.ran.de/fussball/bundesliga/video/schalke-toennies-moechte-raul-zurueck-ganze-folge',
+ 'info_dict': {
+ 'id': '3004256',
+ 'ext': 'mp4',
+ 'title': 'Schalke: Tönnies möchte Raul zurück',
+ 'description': 'md5:4b5b271d9bcde223b54390754c8ece3f',
+ 'upload_date': '20140226',
+ 'duration': 228.96,
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ 'skip': 'This video is unavailable',
+ },
+ {
+ 'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip',
+ 'info_dict': {
+ 'id': '2572814',
+ 'ext': 'mp4',
+ 'title': 'The Voice of Germany - Andreas Kümmert: Rocket Man',
+ 'description': 'md5:6ddb02b0781c6adf778afea606652e38',
+ 'timestamp': 1382041620,
+ 'upload_date': '20131017',
+ 'duration': 469.88,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.fem.com/videos/beauty-lifestyle/kurztrips-zum-valentinstag',
+ 'info_dict': {
+ 'id': '2156342',
+ 'ext': 'mp4',
+ 'title': 'Kurztrips zum Valentinstag',
+ 'description': 'Romantischer Kurztrip zum Valentinstag? Nina Heinemann verrät, was sich hier wirklich lohnt.',
+ 'duration': 307.24,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.prosieben.de/tv/joko-gegen-klaas/videos/playlists/episode-8-ganze-folge-playlist',
+ 'info_dict': {
+ 'id': '439664',
+ 'title': 'Episode 8 - Ganze Folge - Playlist',
+ 'description': 'md5:63b8963e71f481782aeea877658dec84',
+ },
+ 'playlist_count': 2,
+ 'skip': 'This video is unavailable',
+ },
+ {
+ # title in <h2 class="subtitle">
+ 'url': 'http://www.prosieben.de/stars/oscar-award/videos/jetzt-erst-enthuellt-das-geheimnis-von-emma-stones-oscar-robe-clip',
+ 'info_dict': {
+ 'id': '4895826',
+ 'ext': 'mp4',
+ 'title': 'Jetzt erst enthüllt: Das Geheimnis von Emma Stones Oscar-Robe',
+ 'description': 'md5:e5ace2bc43fadf7b63adc6187e9450b9',
+ 'upload_date': '20170302',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'geo restricted to Germany',
+ },
+ {
+ # geo restricted to Germany
+ 'url': 'http://www.kabeleinsdoku.de/tv/mayday-alarm-im-cockpit/video/102-notlandung-im-hudson-river-ganze-folge',
+ 'only_matching': True,
+ },
+ {
+ # geo restricted to Germany
+ 'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge',
+ 'only_matching': True,
+ },
+ {
+ # geo restricted to Germany
+ 'url': 'https://www.galileo.tv/video/diese-emojis-werden-oft-missverstanden',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.advopedia.de/videos/lenssen-klaert-auf/lenssen-klaert-auf-folge-8-staffel-3-feiertage-und-freie-tage',
+ 'only_matching': True,
+ },
+ ]
+
+ _TOKEN = 'prosieben'
+ _SALT = '01!8d8F_)r9]4s[qeuXfP%'
+ _CLIENT_NAME = 'kolibri-2.0.19-splec4'
+
+ _ACCESS_ID = 'x_prosiebenmaxx-de'
+ _ENCRYPTION_KEY = 'Eeyeey9oquahthainoofashoyoikosag'
+ _IV = 'Aeluchoc6aevechuipiexeeboowedaok'
+
+ _CLIPID_REGEXES = [
+ r'"clip_id"\s*:\s+"(\d+)"',
+ r'clipid: "(\d+)"',
+ r'clip[iI]d=(\d+)',
+ r'clip[iI][dD]\s*=\s*["\'](\d+)',
+ r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)",
+ r'proMamsId&quot;\s*:\s*&quot;(\d+)',
+ r'proMamsId"\s*:\s*"(\d+)',
+ ]
+ _TITLE_REGEXES = [
+ r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>',
+ r'<header class="clearfix">\s*<h3>(.+?)</h3>',
+ r'<!-- start video -->\s*<h1>(.+?)</h1>',
+ r'<h1 class="att-name">\s*(.+?)</h1>',
+ r'<header class="module_header">\s*<h2>([^<]+)</h2>\s*</header>',
+ r'<h2 class="video-title" itemprop="name">\s*(.+?)</h2>',
+ r'<div[^>]+id="veeseoTitle"[^>]*>(.+?)</div>',
+ r'<h2[^>]+class="subtitle"[^>]*>([^<]+)</h2>',
+ ]
+ _DESCRIPTION_REGEXES = [
+ r'<p itemprop="description">\s*(.+?)</p>',
+ r'<div class="videoDecription">\s*<p><strong>Beschreibung</strong>: (.+?)</p>',
+ r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>',
+ r'<p class="att-description">\s*(.+?)\s*</p>',
+ r'<p class="video-description" itemprop="description">\s*(.+?)</p>',
+ r'<div[^>]+id="veeseoDescription"[^>]*>(.+?)</div>',
+ ]
+ _UPLOAD_DATE_REGEXES = [
+ r'<span>\s*(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}) \|\s*<span itemprop="duration"',
+ r'<footer>\s*(\d{2}\.\d{2}\.\d{4}) \d{2}:\d{2} Uhr',
+ r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>',
+ r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>',
+ ]
+ _PAGE_TYPE_REGEXES = [
+ r'<meta name="page_type" content="([^"]+)">',
+ r"'itemType'\s*:\s*'([^']*)'",
+ ]
+ _PLAYLIST_ID_REGEXES = [
+ r'content[iI]d=(\d+)',
+ r"'itemId'\s*:\s*'([^']*)'",
+ ]
+ _PLAYLIST_CLIP_REGEXES = [
+ r'(?s)data-qvt=.+?<a href="([^"]+)"',
+ ]
+
+ def _extract_clip(self, url, webpage):
+ clip_id = self._html_search_regex(
+ self._CLIPID_REGEXES, webpage, 'clip id')
+ title = self._html_search_regex(
+ self._TITLE_REGEXES, webpage, 'title',
+ default=None) or self._og_search_title(webpage)
+ info = self._extract_video_info(url, clip_id)
+ description = self._html_search_regex(
+ self._DESCRIPTION_REGEXES, webpage, 'description', default=None)
+ if description is None:
+ description = self._og_search_description(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+ upload_date = unified_strdate(
+ self._html_search_meta('og:published_time', webpage,
+ 'upload date', default=None)
+ or self._html_search_regex(self._UPLOAD_DATE_REGEXES,
+ webpage, 'upload date', default=None))
+
+ json_ld = self._search_json_ld(webpage, clip_id, default={})
+
+ return merge_dicts(info, {
+ 'id': clip_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ }, json_ld)
+
+ def _extract_playlist(self, url, webpage):
+ playlist_id = self._html_search_regex(
+ self._PLAYLIST_ID_REGEXES, webpage, 'playlist id')
+ playlist = self._parse_json(
+ self._search_regex(
+ r'var\s+contentResources\s*=\s*(\[.+?\]);\s*</script',
+ webpage, 'playlist'),
+ playlist_id)
+ entries = []
+ for item in playlist:
+ clip_id = item.get('id') or item.get('upc')
+ if not clip_id:
+ continue
+ info = self._extract_video_info(url, clip_id)
+ info.update({
+ 'id': clip_id,
+ 'title': item.get('title') or item.get('teaser', {}).get('headline'),
+ 'description': item.get('teaser', {}).get('description'),
+ 'thumbnail': item.get('poster'),
+ 'duration': float_or_none(item.get('duration')),
+ 'series': item.get('tvShowTitle'),
+ 'uploader': item.get('broadcastPublisher'),
+ })
+ entries.append(info)
+ return self.playlist_result(entries, playlist_id)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ page_type = self._search_regex(
+ self._PAGE_TYPE_REGEXES, webpage,
+ 'page type', default='clip').lower()
+ if page_type == 'clip':
+ return self._extract_clip(url, webpage)
+ elif page_type == 'playlist':
+ return self._extract_playlist(url, webpage)
+ else:
+ raise ExtractorError(
+ 'Unsupported page type %s' % page_type, expected=True)
diff --git a/youtube_dlc/extractor/puhutv.py b/youtube_dlc/extractor/puhutv.py
new file mode 100644
index 000000000..ca71665e0
--- /dev/null
+++ b/youtube_dlc/extractor/puhutv.py
@@ -0,0 +1,239 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ float_or_none,
+ parse_resolution,
+ str_or_none,
+ try_get,
+ unified_timestamp,
+ url_or_none,
+ urljoin,
+)
+
+
+class PuhuTVIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-izle'
+ IE_NAME = 'puhutv'
+ _TESTS = [{
+ # film
+ 'url': 'https://puhutv.com/sut-kardesler-izle',
+ 'md5': 'a347470371d56e1585d1b2c8dab01c96',
+ 'info_dict': {
+ 'id': '5085',
+ 'display_id': 'sut-kardesler',
+ 'ext': 'mp4',
+ 'title': 'Süt Kardeşler',
+ 'description': 'md5:ca09da25b7e57cbb5a9280d6e48d17aa',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 4832.44,
+ 'creator': 'Arzu Film',
+ 'timestamp': 1561062602,
+ 'upload_date': '20190620',
+ 'release_year': 1976,
+ 'view_count': int,
+ 'tags': list,
+ },
+ }, {
+ # episode, geo restricted, bypassable with --geo-verification-proxy
+ 'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle',
+ 'only_matching': True,
+ }, {
+ # 4k, with subtitles
+ 'url': 'https://puhutv.com/dip-1-bolum-izle',
+ 'only_matching': True,
+ }]
+ _SUBTITLE_LANGS = {
+ 'English': 'en',
+ 'Deutsch': 'de',
+ 'عربى': 'ar'
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ info = self._download_json(
+ urljoin(url, '/api/slug/%s-izle' % display_id),
+ display_id)['data']
+
+ video_id = compat_str(info['id'])
+ show = info.get('title') or {}
+ title = info.get('name') or show['name']
+ if info.get('display_name'):
+ title = '%s %s' % (title, info['display_name'])
+
+ try:
+ videos = self._download_json(
+ 'https://puhutv.com/api/assets/%s/videos' % video_id,
+ display_id, 'Downloading video JSON',
+ headers=self.geo_verification_headers())
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ self.raise_geo_restricted()
+ raise
+
+ urls = []
+ formats = []
+
+ for video in videos['data']['videos']:
+ media_url = url_or_none(video.get('url'))
+ if not media_url or media_url in urls:
+ continue
+ urls.append(media_url)
+
+ playlist = video.get('is_playlist')
+ if (video.get('stream_type') == 'hls' and playlist is True) or 'playlist.m3u8' in media_url:
+ formats.extend(self._extract_m3u8_formats(
+ media_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ continue
+
+ quality = int_or_none(video.get('quality'))
+ f = {
+ 'url': media_url,
+ 'ext': 'mp4',
+ 'height': quality
+ }
+ video_format = video.get('video_format')
+ is_hls = (video_format == 'hls' or '/hls/' in media_url or '/chunklist.m3u8' in media_url) and playlist is False
+ if is_hls:
+ format_id = 'hls'
+ f['protocol'] = 'm3u8_native'
+ elif video_format == 'mp4':
+ format_id = 'http'
+ else:
+ continue
+ if quality:
+ format_id += '-%sp' % quality
+ f['format_id'] = format_id
+ formats.append(f)
+ self._sort_formats(formats)
+
+ creator = try_get(
+ show, lambda x: x['producer']['name'], compat_str)
+
+ content = info.get('content') or {}
+
+ images = try_get(
+ content, lambda x: x['images']['wide'], dict) or {}
+ thumbnails = []
+ for image_id, image_url in images.items():
+ if not isinstance(image_url, compat_str):
+ continue
+ if not image_url.startswith(('http', '//')):
+ image_url = 'https://%s' % image_url
+ t = parse_resolution(image_id)
+ t.update({
+ 'id': image_id,
+ 'url': image_url
+ })
+ thumbnails.append(t)
+
+ tags = []
+ for genre in show.get('genres') or []:
+ if not isinstance(genre, dict):
+ continue
+ genre_name = genre.get('name')
+ if genre_name and isinstance(genre_name, compat_str):
+ tags.append(genre_name)
+
+ subtitles = {}
+ for subtitle in content.get('subtitles') or []:
+ if not isinstance(subtitle, dict):
+ continue
+ lang = subtitle.get('language')
+ sub_url = url_or_none(subtitle.get('url') or subtitle.get('file'))
+ if not lang or not isinstance(lang, compat_str) or not sub_url:
+ continue
+ subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
+ 'url': sub_url
+ }]
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': info.get('description') or show.get('description'),
+ 'season_id': str_or_none(info.get('season_id')),
+ 'season_number': int_or_none(info.get('season_number')),
+ 'episode_number': int_or_none(info.get('episode_number')),
+ 'release_year': int_or_none(show.get('released_at')),
+ 'timestamp': unified_timestamp(info.get('created_at')),
+ 'creator': creator,
+ 'view_count': int_or_none(content.get('watch_count')),
+ 'duration': float_or_none(content.get('duration_in_ms'), 1000),
+ 'tags': tags,
+ 'subtitles': subtitles,
+ 'thumbnails': thumbnails,
+ 'formats': formats
+ }
+
+
+class PuhuTVSerieIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-detay'
+ IE_NAME = 'puhutv:serie'
+ _TESTS = [{
+ 'url': 'https://puhutv.com/deniz-yildizi-detay',
+ 'info_dict': {
+ 'title': 'Deniz Yıldızı',
+ 'id': 'deniz-yildizi',
+ },
+ 'playlist_mincount': 205,
+ }, {
+ # a film detail page which is using same url with serie page
+ 'url': 'https://puhutv.com/kaybedenler-kulubu-detay',
+ 'only_matching': True,
+ }]
+
+ def _extract_entries(self, seasons):
+ for season in seasons:
+ season_id = season.get('id')
+ if not season_id:
+ continue
+ page = 1
+ has_more = True
+ while has_more is True:
+ season = self._download_json(
+ 'https://galadriel.puhutv.com/seasons/%s' % season_id,
+ season_id, 'Downloading page %s' % page, query={
+ 'page': page,
+ 'per': 40,
+ })
+ episodes = season.get('episodes')
+ if isinstance(episodes, list):
+ for ep in episodes:
+ slug_path = str_or_none(ep.get('slugPath'))
+ if not slug_path:
+ continue
+ video_id = str_or_none(int_or_none(ep.get('id')))
+ yield self.url_result(
+ 'https://puhutv.com/%s' % slug_path,
+ ie=PuhuTVIE.ie_key(), video_id=video_id,
+ video_title=ep.get('name') or ep.get('eventLabel'))
+ page += 1
+ has_more = season.get('hasMore')
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ info = self._download_json(
+ urljoin(url, '/api/slug/%s-detay' % playlist_id),
+ playlist_id)['data']
+
+ seasons = info.get('seasons')
+ if seasons:
+ return self.playlist_result(
+ self._extract_entries(seasons), playlist_id, info.get('name'))
+
+ # For films, these are using same url with series
+ video_id = info.get('slug') or info['assets'][0]['slug']
+ return self.url_result(
+ 'https://puhutv.com/%s-izle' % video_id,
+ PuhuTVIE.ie_key(), video_id)
diff --git a/youtube_dl/extractor/puls4.py b/youtube_dlc/extractor/puls4.py
index 80091b85f..80091b85f 100644
--- a/youtube_dl/extractor/puls4.py
+++ b/youtube_dlc/extractor/puls4.py
diff --git a/youtube_dl/extractor/pyvideo.py b/youtube_dlc/extractor/pyvideo.py
index b8ac93a62..b8ac93a62 100644
--- a/youtube_dl/extractor/pyvideo.py
+++ b/youtube_dlc/extractor/pyvideo.py
diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dlc/extractor/qqmusic.py
index 084308aeb..084308aeb 100644
--- a/youtube_dl/extractor/qqmusic.py
+++ b/youtube_dlc/extractor/qqmusic.py
diff --git a/youtube_dl/extractor/r7.py b/youtube_dlc/extractor/r7.py
index e2202d603..e2202d603 100644
--- a/youtube_dl/extractor/r7.py
+++ b/youtube_dlc/extractor/r7.py
diff --git a/youtube_dl/extractor/radiobremen.py b/youtube_dlc/extractor/radiobremen.py
index 2c35f9845..2c35f9845 100644
--- a/youtube_dl/extractor/radiobremen.py
+++ b/youtube_dlc/extractor/radiobremen.py
diff --git a/youtube_dl/extractor/radiocanada.py b/youtube_dlc/extractor/radiocanada.py
index a28b1a24c..a28b1a24c 100644
--- a/youtube_dl/extractor/radiocanada.py
+++ b/youtube_dlc/extractor/radiocanada.py
diff --git a/youtube_dl/extractor/radiode.py b/youtube_dlc/extractor/radiode.py
index 2c06c8b1e..2c06c8b1e 100644
--- a/youtube_dl/extractor/radiode.py
+++ b/youtube_dlc/extractor/radiode.py
diff --git a/youtube_dl/extractor/radiofrance.py b/youtube_dlc/extractor/radiofrance.py
index a8afc0014..a8afc0014 100644
--- a/youtube_dl/extractor/radiofrance.py
+++ b/youtube_dlc/extractor/radiofrance.py
diff --git a/youtube_dl/extractor/radiojavan.py b/youtube_dlc/extractor/radiojavan.py
index 3f74f0c01..3f74f0c01 100644
--- a/youtube_dl/extractor/radiojavan.py
+++ b/youtube_dlc/extractor/radiojavan.py
diff --git a/youtube_dl/extractor/rai.py b/youtube_dlc/extractor/rai.py
index 207a6c247..207a6c247 100644
--- a/youtube_dl/extractor/rai.py
+++ b/youtube_dlc/extractor/rai.py
diff --git a/youtube_dl/extractor/raywenderlich.py b/youtube_dlc/extractor/raywenderlich.py
index 5411ece21..5411ece21 100644
--- a/youtube_dl/extractor/raywenderlich.py
+++ b/youtube_dlc/extractor/raywenderlich.py
diff --git a/youtube_dl/extractor/rbmaradio.py b/youtube_dlc/extractor/rbmaradio.py
index ae7413fb5..ae7413fb5 100644
--- a/youtube_dl/extractor/rbmaradio.py
+++ b/youtube_dlc/extractor/rbmaradio.py
diff --git a/youtube_dl/extractor/rds.py b/youtube_dlc/extractor/rds.py
index 8c016a77d..8c016a77d 100644
--- a/youtube_dl/extractor/rds.py
+++ b/youtube_dlc/extractor/rds.py
diff --git a/youtube_dl/extractor/redbulltv.py b/youtube_dlc/extractor/redbulltv.py
index dbe1aaded..dbe1aaded 100644
--- a/youtube_dl/extractor/redbulltv.py
+++ b/youtube_dlc/extractor/redbulltv.py
diff --git a/youtube_dl/extractor/reddit.py b/youtube_dlc/extractor/reddit.py
index 663f622b3..663f622b3 100644
--- a/youtube_dl/extractor/reddit.py
+++ b/youtube_dlc/extractor/reddit.py
diff --git a/youtube_dlc/extractor/redtube.py b/youtube_dlc/extractor/redtube.py
new file mode 100644
index 000000000..2d2f6a98c
--- /dev/null
+++ b/youtube_dlc/extractor/redtube.py
@@ -0,0 +1,133 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ merge_dicts,
+ str_to_int,
+ unified_strdate,
+ url_or_none,
+)
+
+
+class RedTubeIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www\.)?redtube\.com/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.redtube.com/66418',
+ 'md5': 'fc08071233725f26b8f014dba9590005',
+ 'info_dict': {
+ 'id': '66418',
+ 'ext': 'mp4',
+ 'title': 'Sucked on a toilet',
+ 'upload_date': '20110811',
+ 'duration': 596,
+ 'view_count': int,
+ 'age_limit': 18,
+ }
+ }, {
+ 'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)',
+ webpage)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(
+ 'http://www.redtube.com/%s' % video_id, video_id)
+
+ ERRORS = (
+ (('video-deleted-info', '>This video has been removed'), 'has been removed'),
+ (('private_video_text', '>This video is private', '>Send a friend request to its owner to be able to view it'), 'is private'),
+ )
+
+ for patterns, message in ERRORS:
+ if any(p in webpage for p in patterns):
+ raise ExtractorError(
+ 'Video %s %s' % (video_id, message), expected=True)
+
+ info = self._search_json_ld(webpage, video_id, default={})
+
+ if not info.get('title'):
+ info['title'] = self._html_search_regex(
+ (r'<h(\d)[^>]+class="(?:video_title_text|videoTitle|video_title)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>',
+ r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1',),
+ webpage, 'title', group='title',
+ default=None) or self._og_search_title(webpage)
+
+ formats = []
+ sources = self._parse_json(
+ self._search_regex(
+ r'sources\s*:\s*({.+?})', webpage, 'source', default='{}'),
+ video_id, fatal=False)
+ if sources and isinstance(sources, dict):
+ for format_id, format_url in sources.items():
+ if format_url:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'height': int_or_none(format_id),
+ })
+ medias = self._parse_json(
+ self._search_regex(
+ r'mediaDefinition["\']?\s*:\s*(\[.+?}\s*\])', webpage,
+ 'media definitions', default='{}'),
+ video_id, fatal=False)
+ if medias and isinstance(medias, list):
+ for media in medias:
+ format_url = url_or_none(media.get('videoUrl'))
+ if not format_url:
+ continue
+ if media.get('format') == 'hls' or determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls',
+ fatal=False))
+ continue
+ format_id = media.get('quality')
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'height': int_or_none(format_id),
+ })
+ if not formats:
+ video_url = self._html_search_regex(
+ r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL')
+ formats.append({'url': video_url})
+ self._sort_formats(formats)
+
+ thumbnail = self._og_search_thumbnail(webpage)
+ upload_date = unified_strdate(self._search_regex(
+ r'<span[^>]+>(?:ADDED|Published on) ([^<]+)<',
+ webpage, 'upload date', default=None))
+ duration = int_or_none(self._og_search_property(
+ 'video:duration', webpage, default=None) or self._search_regex(
+ r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None))
+ view_count = str_to_int(self._search_regex(
+ (r'<div[^>]*>Views</div>\s*<div[^>]*>\s*([\d,.]+)',
+ r'<span[^>]*>VIEWS</span>\s*</td>\s*<td>\s*([\d,.]+)',
+ r'<span[^>]+\bclass=["\']video_view_count[^>]*>\s*([\d,.]+)'),
+ webpage, 'view count', default=None))
+
+ # No self-labeling, but they describe themselves as
+ # "Home of Videos Porno"
+ age_limit = 18
+
+ return merge_dicts(info, {
+ 'id': video_id,
+ 'ext': 'mp4',
+ 'thumbnail': thumbnail,
+ 'upload_date': upload_date,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ })
diff --git a/youtube_dl/extractor/regiotv.py b/youtube_dlc/extractor/regiotv.py
index e250a52f0..e250a52f0 100644
--- a/youtube_dl/extractor/regiotv.py
+++ b/youtube_dlc/extractor/regiotv.py
diff --git a/youtube_dl/extractor/rentv.py b/youtube_dlc/extractor/rentv.py
index 7c8909d95..7c8909d95 100644
--- a/youtube_dl/extractor/rentv.py
+++ b/youtube_dlc/extractor/rentv.py
diff --git a/youtube_dl/extractor/restudy.py b/youtube_dlc/extractor/restudy.py
index d47fb45ca..d47fb45ca 100644
--- a/youtube_dl/extractor/restudy.py
+++ b/youtube_dlc/extractor/restudy.py
diff --git a/youtube_dl/extractor/reuters.py b/youtube_dlc/extractor/reuters.py
index 9dc482d21..9dc482d21 100644
--- a/youtube_dl/extractor/reuters.py
+++ b/youtube_dlc/extractor/reuters.py
diff --git a/youtube_dl/extractor/reverbnation.py b/youtube_dlc/extractor/reverbnation.py
index 4cb99c244..4cb99c244 100644
--- a/youtube_dl/extractor/reverbnation.py
+++ b/youtube_dlc/extractor/reverbnation.py
diff --git a/youtube_dl/extractor/rice.py b/youtube_dlc/extractor/rice.py
index f855719ac..f855719ac 100644
--- a/youtube_dl/extractor/rice.py
+++ b/youtube_dlc/extractor/rice.py
diff --git a/youtube_dl/extractor/rmcdecouverte.py b/youtube_dlc/extractor/rmcdecouverte.py
index c3623edcc..c3623edcc 100644
--- a/youtube_dl/extractor/rmcdecouverte.py
+++ b/youtube_dlc/extractor/rmcdecouverte.py
diff --git a/youtube_dl/extractor/ro220.py b/youtube_dlc/extractor/ro220.py
index 69934ef2b..69934ef2b 100644
--- a/youtube_dl/extractor/ro220.py
+++ b/youtube_dlc/extractor/ro220.py
diff --git a/youtube_dl/extractor/rockstargames.py b/youtube_dlc/extractor/rockstargames.py
index cd6904bc9..cd6904bc9 100644
--- a/youtube_dl/extractor/rockstargames.py
+++ b/youtube_dlc/extractor/rockstargames.py
diff --git a/youtube_dlc/extractor/roosterteeth.py b/youtube_dlc/extractor/roosterteeth.py
new file mode 100644
index 000000000..8883639b2
--- /dev/null
+++ b/youtube_dlc/extractor/roosterteeth.py
@@ -0,0 +1,137 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ str_or_none,
+ urlencode_postdata,
+)
+
+
+class RoosterTeethIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:episode|watch)/(?P<id>[^/?#&]+)'
+ _NETRC_MACHINE = 'roosterteeth'
+ _TESTS = [{
+ 'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
+ 'md5': 'e2bd7764732d785ef797700a2489f212',
+ 'info_dict': {
+ 'id': '9156',
+ 'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement',
+ 'ext': 'mp4',
+ 'title': 'Million Dollars, But... The Game Announcement',
+ 'description': 'md5:168a54b40e228e79f4ddb141e89fe4f5',
+ 'thumbnail': r're:^https?://.*\.png$',
+ 'series': 'Million Dollars, But...',
+ 'episode': 'Million Dollars, But... The Game Announcement',
+ },
+ }, {
+ 'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://funhaus.roosterteeth.com/episode/funhaus-shorts-2016-austin-sucks-funhaus-shorts',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://screwattack.roosterteeth.com/episode/death-battle-season-3-mewtwo-vs-shadow',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://theknow.roosterteeth.com/episode/the-know-game-news-season-1-boring-steam-sales-are-better',
+ 'only_matching': True,
+ }, {
+ # only available for FIRST members
+ 'url': 'http://roosterteeth.com/episode/rt-docs-the-world-s-greatest-head-massage-the-world-s-greatest-head-massage-an-asmr-journey-part-one',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
+ 'only_matching': True,
+ }]
+ _EPISODE_BASE_URL = 'https://svod-be.roosterteeth.com/api/v1/episodes/'
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ try:
+ self._download_json(
+ 'https://auth.roosterteeth.com/oauth/token',
+ None, 'Logging in', data=urlencode_postdata({
+ 'client_id': '4338d2b4bdc8db1239360f28e72f0d9ddb1fd01e7a38fbb07b4b1f4ba4564cc5',
+ 'grant_type': 'password',
+ 'username': username,
+ 'password': password,
+ }))
+ except ExtractorError as e:
+ msg = 'Unable to login'
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ resp = self._parse_json(e.cause.read().decode(), None, fatal=False)
+ if resp:
+ error = resp.get('extra_info') or resp.get('error_description') or resp.get('error')
+ if error:
+ msg += ': ' + error
+ self.report_warning(msg)
+
+ def _real_initialize(self):
+ if self._get_cookies(self._EPISODE_BASE_URL).get('rt_access_token'):
+ return
+ self._login()
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ api_episode_url = self._EPISODE_BASE_URL + display_id
+
+ try:
+ m3u8_url = self._download_json(
+ api_episode_url + '/videos', display_id,
+ 'Downloading video JSON metadata')['data'][0]['attributes']['url']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ if self._parse_json(e.cause.read().decode(), display_id).get('access') is False:
+ self.raise_login_required(
+ '%s is only available for FIRST members' % display_id)
+ raise
+
+ formats = self._extract_m3u8_formats(
+ m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls')
+ self._sort_formats(formats)
+
+ episode = self._download_json(
+ api_episode_url, display_id,
+ 'Downloading episode JSON metadata')['data'][0]
+ attributes = episode['attributes']
+ title = attributes.get('title') or attributes['display_title']
+ video_id = compat_str(episode['id'])
+
+ thumbnails = []
+ for image in episode.get('included', {}).get('images', []):
+ if image.get('type') == 'episode_image':
+ img_attributes = image.get('attributes') or {}
+ for k in ('thumb', 'small', 'medium', 'large'):
+ img_url = img_attributes.get(k)
+ if img_url:
+ thumbnails.append({
+ 'id': k,
+ 'url': img_url,
+ })
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': attributes.get('description') or attributes.get('caption'),
+ 'thumbnails': thumbnails,
+ 'series': attributes.get('show_title'),
+ 'season_number': int_or_none(attributes.get('season_number')),
+ 'season_id': attributes.get('season_id'),
+ 'episode': title,
+ 'episode_number': int_or_none(attributes.get('number')),
+ 'episode_id': str_or_none(episode.get('uuid')),
+ 'formats': formats,
+ 'channel_id': attributes.get('channel_id'),
+ 'duration': int_or_none(attributes.get('length')),
+ }
diff --git a/youtube_dl/extractor/rottentomatoes.py b/youtube_dlc/extractor/rottentomatoes.py
index 14c8e8236..14c8e8236 100644
--- a/youtube_dl/extractor/rottentomatoes.py
+++ b/youtube_dlc/extractor/rottentomatoes.py
diff --git a/youtube_dl/extractor/roxwel.py b/youtube_dlc/extractor/roxwel.py
index 65284643b..65284643b 100644
--- a/youtube_dl/extractor/roxwel.py
+++ b/youtube_dlc/extractor/roxwel.py
diff --git a/youtube_dl/extractor/rozhlas.py b/youtube_dlc/extractor/rozhlas.py
index fccf69401..fccf69401 100644
--- a/youtube_dl/extractor/rozhlas.py
+++ b/youtube_dlc/extractor/rozhlas.py
diff --git a/youtube_dl/extractor/rtbf.py b/youtube_dlc/extractor/rtbf.py
index 3b0f3080b..3b0f3080b 100644
--- a/youtube_dl/extractor/rtbf.py
+++ b/youtube_dlc/extractor/rtbf.py
diff --git a/youtube_dl/extractor/rte.py b/youtube_dlc/extractor/rte.py
index 1fbc72915..1fbc72915 100644
--- a/youtube_dl/extractor/rte.py
+++ b/youtube_dlc/extractor/rte.py
diff --git a/youtube_dl/extractor/rtl2.py b/youtube_dlc/extractor/rtl2.py
index 70f000ca8..70f000ca8 100644
--- a/youtube_dl/extractor/rtl2.py
+++ b/youtube_dlc/extractor/rtl2.py
diff --git a/youtube_dl/extractor/rtlnl.py b/youtube_dlc/extractor/rtlnl.py
index fadca8c17..fadca8c17 100644
--- a/youtube_dl/extractor/rtlnl.py
+++ b/youtube_dlc/extractor/rtlnl.py
diff --git a/youtube_dl/extractor/rtp.py b/youtube_dlc/extractor/rtp.py
index 02986f442..02986f442 100644
--- a/youtube_dl/extractor/rtp.py
+++ b/youtube_dlc/extractor/rtp.py
diff --git a/youtube_dl/extractor/rts.py b/youtube_dlc/extractor/rts.py
index 48f17b828..48f17b828 100644
--- a/youtube_dl/extractor/rts.py
+++ b/youtube_dlc/extractor/rts.py
diff --git a/youtube_dl/extractor/rtve.py b/youtube_dlc/extractor/rtve.py
index ce9db0629..ce9db0629 100644
--- a/youtube_dl/extractor/rtve.py
+++ b/youtube_dlc/extractor/rtve.py
diff --git a/youtube_dl/extractor/rtvnh.py b/youtube_dlc/extractor/rtvnh.py
index 6a00f7007..6a00f7007 100644
--- a/youtube_dl/extractor/rtvnh.py
+++ b/youtube_dlc/extractor/rtvnh.py
diff --git a/youtube_dl/extractor/rtvs.py b/youtube_dlc/extractor/rtvs.py
index 6573b260d..6573b260d 100644
--- a/youtube_dl/extractor/rtvs.py
+++ b/youtube_dlc/extractor/rtvs.py
diff --git a/youtube_dl/extractor/ruhd.py b/youtube_dlc/extractor/ruhd.py
index 3c8053a26..3c8053a26 100644
--- a/youtube_dl/extractor/ruhd.py
+++ b/youtube_dlc/extractor/ruhd.py
diff --git a/youtube_dl/extractor/rutube.py b/youtube_dlc/extractor/rutube.py
index 8f54d5675..8f54d5675 100644
--- a/youtube_dl/extractor/rutube.py
+++ b/youtube_dlc/extractor/rutube.py
diff --git a/youtube_dl/extractor/rutv.py b/youtube_dlc/extractor/rutv.py
index d2713c19a..d2713c19a 100644
--- a/youtube_dl/extractor/rutv.py
+++ b/youtube_dlc/extractor/rutv.py
diff --git a/youtube_dl/extractor/ruutu.py b/youtube_dlc/extractor/ruutu.py
index f984040aa..f984040aa 100644
--- a/youtube_dl/extractor/ruutu.py
+++ b/youtube_dlc/extractor/ruutu.py
diff --git a/youtube_dl/extractor/ruv.py b/youtube_dlc/extractor/ruv.py
index 8f3cc4095..8f3cc4095 100644
--- a/youtube_dl/extractor/ruv.py
+++ b/youtube_dlc/extractor/ruv.py
diff --git a/youtube_dlc/extractor/safari.py b/youtube_dlc/extractor/safari.py
new file mode 100644
index 000000000..2cc665122
--- /dev/null
+++ b/youtube_dlc/extractor/safari.py
@@ -0,0 +1,264 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+
+from ..compat import (
+ compat_parse_qs,
+ compat_urlparse,
+)
+from ..utils import (
+ ExtractorError,
+ update_url_query,
+)
+
+
+class SafariBaseIE(InfoExtractor):
+ _LOGIN_URL = 'https://learning.oreilly.com/accounts/login/'
+ _NETRC_MACHINE = 'safari'
+
+ _API_BASE = 'https://learning.oreilly.com/api/v1'
+ _API_FORMAT = 'json'
+
+ LOGGED_IN = False
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ _, urlh = self._download_webpage_handle(
+ 'https://learning.oreilly.com/accounts/login-check/', None,
+ 'Downloading login page')
+
+ def is_logged(urlh):
+ return 'learning.oreilly.com/home/' in urlh.geturl()
+
+ if is_logged(urlh):
+ self.LOGGED_IN = True
+ return
+
+ redirect_url = urlh.geturl()
+ parsed_url = compat_urlparse.urlparse(redirect_url)
+ qs = compat_parse_qs(parsed_url.query)
+ next_uri = compat_urlparse.urljoin(
+ 'https://api.oreilly.com', qs['next'][0])
+
+ auth, urlh = self._download_json_handle(
+ 'https://www.oreilly.com/member/auth/login/', None, 'Logging in',
+ data=json.dumps({
+ 'email': username,
+ 'password': password,
+ 'redirect_uri': next_uri,
+ }).encode(), headers={
+ 'Content-Type': 'application/json',
+ 'Referer': redirect_url,
+ }, expected_status=400)
+
+ credentials = auth.get('credentials')
+ if (not auth.get('logged_in') and not auth.get('redirect_uri')
+ and credentials):
+ raise ExtractorError(
+ 'Unable to login: %s' % credentials, expected=True)
+
+ # oreilly serves two same instances of the following cookies
+ # in Set-Cookie header and expects first one to be actually set
+ for cookie in ('groot_sessionid', 'orm-jwt', 'orm-rt'):
+ self._apply_first_set_cookie_header(urlh, cookie)
+
+ _, urlh = self._download_webpage_handle(
+ auth.get('redirect_uri') or next_uri, None, 'Completing login',)
+
+ if is_logged(urlh):
+ self.LOGGED_IN = True
+ return
+
+ raise ExtractorError('Unable to log in')
+
+
+class SafariIE(SafariBaseIE):
+ IE_NAME = 'safari'
+ IE_DESC = 'safaribooksonline.com online video'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/
+ (?:
+ library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?\#&]+)\.html|
+ videos/[^/]+/[^/]+/(?P<reference_id>[^-]+-[^/?\#&]+)
+ )
+ '''
+
+ _TESTS = [{
+ 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
+ 'md5': 'dcc5a425e79f2564148652616af1f2a3',
+ 'info_dict': {
+ 'id': '0_qbqx90ic',
+ 'ext': 'mp4',
+ 'title': 'Introduction to Hadoop Fundamentals LiveLessons',
+ 'timestamp': 1437758058,
+ 'upload_date': '20150724',
+ 'uploader_id': 'stork',
+ },
+ }, {
+ # non-digits in course id
+ 'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.safaribooksonline.com/library/view/learning-path-red/9780134664057/RHCE_Introduction.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314/9780134217314-PYMC_13_00',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838/9780133392838-00_SeriesIntro',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/00_SeriesIntro.html',
+ 'only_matching': True,
+ }]
+
+ _PARTNER_ID = '1926081'
+ _UICONF_ID = '29375172'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+
+ reference_id = mobj.group('reference_id')
+ if reference_id:
+ video_id = reference_id
+ partner_id = self._PARTNER_ID
+ ui_id = self._UICONF_ID
+ else:
+ video_id = '%s-%s' % (mobj.group('course_id'), mobj.group('part'))
+
+ webpage, urlh = self._download_webpage_handle(url, video_id)
+
+ mobj = re.match(self._VALID_URL, urlh.geturl())
+ reference_id = mobj.group('reference_id')
+ if not reference_id:
+ reference_id = self._search_regex(
+ r'data-reference-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
+ webpage, 'kaltura reference id', group='id')
+ partner_id = self._search_regex(
+ r'data-partner-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
+ webpage, 'kaltura widget id', default=self._PARTNER_ID,
+ group='id')
+ ui_id = self._search_regex(
+ r'data-ui-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
+ webpage, 'kaltura uiconf id', default=self._UICONF_ID,
+ group='id')
+
+ query = {
+ 'wid': '_%s' % partner_id,
+ 'uiconf_id': ui_id,
+ 'flashvars[referenceId]': reference_id,
+ }
+
+ if self.LOGGED_IN:
+ kaltura_session = self._download_json(
+ '%s/player/kaltura_session/?reference_id=%s' % (self._API_BASE, reference_id),
+ video_id, 'Downloading kaltura session JSON',
+ 'Unable to download kaltura session JSON', fatal=False,
+ headers={'Accept': 'application/json'})
+ if kaltura_session:
+ session = kaltura_session.get('session')
+ if session:
+ query['flashvars[ks]'] = session
+
+ return self.url_result(update_url_query(
+ 'https://cdnapisec.kaltura.com/html5/html5lib/v2.37.1/mwEmbedFrame.php', query),
+ 'Kaltura')
+
+
+class SafariApiIE(SafariBaseIE):
+ IE_NAME = 'safari:api'
+ _VALID_URL = r'https?://(?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html'
+
+ _TESTS = [{
+ 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.safaribooksonline.com/api/v1/book/9780134664057/chapter/RHCE_Introduction.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ part = self._download_json(
+ url, '%s/%s' % (mobj.group('course_id'), mobj.group('part')),
+ 'Downloading part JSON')
+ return self.url_result(part['web_url'], SafariIE.ie_key())
+
+
+class SafariCourseIE(SafariBaseIE):
+ IE_NAME = 'safari:course'
+ IE_DESC = 'safaribooksonline.com online courses'
+
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/
+ (?:
+ library/view/[^/]+|
+ api/v1/book|
+ videos/[^/]+
+ )|
+ techbus\.safaribooksonline\.com
+ )
+ /(?P<id>[^/]+)
+ '''
+
+ _TESTS = [{
+ 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
+ 'info_dict': {
+ 'id': '9780133392838',
+ 'title': 'Hadoop Fundamentals LiveLessons',
+ },
+ 'playlist_count': 22,
+ 'skip': 'Requires safaribooksonline account credentials',
+ }, {
+ 'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://techbus.safaribooksonline.com/9780134426365',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (False if SafariIE.suitable(url) or SafariApiIE.suitable(url)
+ else super(SafariCourseIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ course_id = self._match_id(url)
+
+ course_json = self._download_json(
+ '%s/book/%s/?override_format=%s' % (self._API_BASE, course_id, self._API_FORMAT),
+ course_id, 'Downloading course JSON')
+
+ if 'chapters' not in course_json:
+ raise ExtractorError(
+ 'No chapters found for course %s' % course_id, expected=True)
+
+ entries = [
+ self.url_result(chapter, SafariApiIE.ie_key())
+ for chapter in course_json['chapters']]
+
+ course_title = course_json['title']
+
+ return self.playlist_result(entries, course_id, course_title)
diff --git a/youtube_dl/extractor/sapo.py b/youtube_dlc/extractor/sapo.py
index 49a9b313a..49a9b313a 100644
--- a/youtube_dl/extractor/sapo.py
+++ b/youtube_dlc/extractor/sapo.py
diff --git a/youtube_dl/extractor/savefrom.py b/youtube_dlc/extractor/savefrom.py
index 21e44b69a..21e44b69a 100644
--- a/youtube_dl/extractor/savefrom.py
+++ b/youtube_dlc/extractor/savefrom.py
diff --git a/youtube_dl/extractor/sbs.py b/youtube_dlc/extractor/sbs.py
index 0e623ff7b..0e623ff7b 100644
--- a/youtube_dl/extractor/sbs.py
+++ b/youtube_dlc/extractor/sbs.py
diff --git a/youtube_dl/extractor/screencast.py b/youtube_dlc/extractor/screencast.py
index 69a0d01f3..69a0d01f3 100644
--- a/youtube_dl/extractor/screencast.py
+++ b/youtube_dlc/extractor/screencast.py
diff --git a/youtube_dl/extractor/screencastomatic.py b/youtube_dlc/extractor/screencastomatic.py
index b5e76c9af..b5e76c9af 100644
--- a/youtube_dl/extractor/screencastomatic.py
+++ b/youtube_dlc/extractor/screencastomatic.py
diff --git a/youtube_dlc/extractor/scrippsnetworks.py b/youtube_dlc/extractor/scrippsnetworks.py
new file mode 100644
index 000000000..b40b4c4af
--- /dev/null
+++ b/youtube_dlc/extractor/scrippsnetworks.py
@@ -0,0 +1,152 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import hashlib
+import re
+
+from .aws import AWSIE
+from .anvato import AnvatoIE
+from .common import InfoExtractor
+from ..utils import (
+ smuggle_url,
+ urlencode_postdata,
+ xpath_text,
+)
+
+
+class ScrippsNetworksWatchIE(AWSIE):
+ IE_NAME = 'scrippsnetworks:watch'
+ _VALID_URL = r'''(?x)
+ https?://
+ watch\.
+ (?P<site>geniuskitchen)\.com/
+ (?:
+ player\.[A-Z0-9]+\.html\#|
+ show/(?:[^/]+/){2}|
+ player/
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ 'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/',
+ 'info_dict': {
+ 'id': '4194875',
+ 'ext': 'mp4',
+ 'title': 'Ample Hills Ice Cream Bike',
+ 'description': 'Courtney Rada churns up a signature GK Now ice cream with The Scoopmaster.',
+ 'uploader': 'ANV',
+ 'upload_date': '20171011',
+ 'timestamp': 1507698000,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [AnvatoIE.ie_key()],
+ }]
+
+ _SNI_TABLE = {
+ 'geniuskitchen': 'genius',
+ }
+
+ _AWS_API_KEY = 'E7wSQmq0qK6xPrF13WmzKiHo4BQ7tip4pQcSXVl1'
+ _AWS_PROXY_HOST = 'web.api.video.snidigital.com'
+
+ _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ site_id, video_id = mobj.group('site', 'id')
+
+ aws_identity_id_json = json.dumps({
+ 'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % self._AWS_REGION
+ }).encode('utf-8')
+ token = self._download_json(
+ 'https://cognito-identity.%s.amazonaws.com/' % self._AWS_REGION, video_id,
+ data=aws_identity_id_json,
+ headers={
+ 'Accept': '*/*',
+ 'Content-Type': 'application/x-amz-json-1.1',
+ 'Referer': url,
+ 'X-Amz-Content-Sha256': hashlib.sha256(aws_identity_id_json).hexdigest(),
+ 'X-Amz-Target': 'AWSCognitoIdentityService.GetOpenIdToken',
+ 'X-Amz-User-Agent': self._AWS_USER_AGENT,
+ })['Token']
+
+ sts = self._download_xml(
+ 'https://sts.amazonaws.com/', video_id, data=urlencode_postdata({
+ 'Action': 'AssumeRoleWithWebIdentity',
+ 'RoleArn': 'arn:aws:iam::710330595350:role/Cognito_WebAPIUnauth_Role',
+ 'RoleSessionName': 'web-identity',
+ 'Version': '2011-06-15',
+ 'WebIdentityToken': token,
+ }), headers={
+ 'Referer': url,
+ 'X-Amz-User-Agent': self._AWS_USER_AGENT,
+ 'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
+ })
+
+ def get(key):
+ return xpath_text(
+ sts, './/{https://sts.amazonaws.com/doc/2011-06-15/}%s' % key,
+ fatal=True)
+
+ mcp_id = self._aws_execute_api({
+ 'uri': '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id),
+ 'access_key': get('AccessKeyId'),
+ 'secret_key': get('SecretAccessKey'),
+ 'session_token': get('SessionToken'),
+ }, video_id)['results'][0]['mcpId']
+
+ return self.url_result(
+ smuggle_url(
+ 'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id,
+ {'geo_countries': ['US']}),
+ AnvatoIE.ie_key(), video_id=mcp_id)
+
+
+class ScrippsNetworksIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?(?P<site>cookingchanneltv|discovery|(?:diy|food)network|hgtv|travelchannel)\.com/videos/[0-9a-z-]+-(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://www.cookingchanneltv.com/videos/the-best-of-the-best-0260338',
+ 'info_dict': {
+ 'id': '0260338',
+ 'ext': 'mp4',
+ 'title': 'The Best of the Best',
+ 'description': 'Catch a new episode of MasterChef Canada Tuedsay at 9/8c.',
+ 'timestamp': 1475678834,
+ 'upload_date': '20161005',
+ 'uploader': 'SCNI-SCND',
+ },
+ 'add_ie': ['ThePlatform'],
+ }, {
+ 'url': 'https://www.diynetwork.com/videos/diy-barnwood-tablet-stand-0265790',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.foodnetwork.com/videos/chocolate-strawberry-cake-roll-7524591',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.hgtv.com/videos/cookie-decorating-101-0301929',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.travelchannel.com/videos/two-climates-one-bag-5302184',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.discovery.com/videos/guardians-of-the-glades-cooking-with-tom-cobb-5578368',
+ 'only_matching': True,
+ }]
+ _ACCOUNT_MAP = {
+ 'cookingchanneltv': 2433005105,
+ 'discovery': 2706091867,
+ 'diynetwork': 2433004575,
+ 'foodnetwork': 2433005105,
+ 'hgtv': 2433004575,
+ 'travelchannel': 2433005739,
+ }
+ _TP_TEMPL = 'https://link.theplatform.com/s/ip77QC/media/guid/%d/%s?mbr=true'
+
+ def _real_extract(self, url):
+ site, guid = re.match(self._VALID_URL, url).groups()
+ return self.url_result(smuggle_url(
+ self._TP_TEMPL % (self._ACCOUNT_MAP[site], guid),
+ {'force_smil_url': True}), 'ThePlatform', guid)
diff --git a/youtube_dlc/extractor/scte.py b/youtube_dlc/extractor/scte.py
new file mode 100644
index 000000000..ca1de63b6
--- /dev/null
+++ b/youtube_dlc/extractor/scte.py
@@ -0,0 +1,144 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ decode_packed_codes,
+ ExtractorError,
+ urlencode_postdata,
+)
+
+
+class SCTEBaseIE(InfoExtractor):
+ _LOGIN_URL = 'https://www.scte.org/SCTE/Sign_In.aspx'
+ _NETRC_MACHINE = 'scte'
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ login_popup = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login popup')
+
+ def is_logged(webpage):
+ return any(re.search(p, webpage) for p in (
+ r'class=["\']welcome\b', r'>Sign Out<'))
+
+ # already logged in
+ if is_logged(login_popup):
+ return
+
+ login_form = self._hidden_inputs(login_popup)
+
+ login_form.update({
+ 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInUserName': username,
+ 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInPassword': password,
+ 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$RememberMe': 'on',
+ })
+
+ response = self._download_webpage(
+ self._LOGIN_URL, None, 'Logging in',
+ data=urlencode_postdata(login_form))
+
+ if '|pageRedirect|' not in response and not is_logged(response):
+ error = self._html_search_regex(
+ r'(?s)<[^>]+class=["\']AsiError["\'][^>]*>(.+?)</',
+ response, 'error message', default=None)
+ if error:
+ raise ExtractorError('Unable to login: %s' % error, expected=True)
+ raise ExtractorError('Unable to log in')
+
+
+class SCTEIE(SCTEBaseIE):
+ _VALID_URL = r'https?://learning\.scte\.org/mod/scorm/view\.php?.*?\bid=(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://learning.scte.org/mod/scorm/view.php?id=31484',
+ 'info_dict': {
+ 'title': 'Introduction to DOCSIS Engineering Professional',
+ 'id': '31484',
+ },
+ 'playlist_count': 5,
+ 'skip': 'Requires account credentials',
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._search_regex(r'<h1>(.+?)</h1>', webpage, 'title')
+
+ context_id = self._search_regex(r'context-(\d+)', webpage, video_id)
+ content_base = 'https://learning.scte.org/pluginfile.php/%s/mod_scorm/content/8/' % context_id
+ context = decode_packed_codes(self._download_webpage(
+ '%smobile/data.js' % content_base, video_id))
+
+ data = self._parse_xml(
+ self._search_regex(
+ r'CreateData\(\s*"(.+?)"', context, 'data').replace(r"\'", "'"),
+ video_id)
+
+ entries = []
+ for asset in data.findall('.//asset'):
+ asset_url = asset.get('url')
+ if not asset_url or not asset_url.endswith('.mp4'):
+ continue
+ asset_id = self._search_regex(
+ r'video_([^_]+)_', asset_url, 'asset id', default=None)
+ if not asset_id:
+ continue
+ entries.append({
+ 'id': asset_id,
+ 'title': title,
+ 'url': content_base + asset_url,
+ })
+
+ return self.playlist_result(entries, video_id, title)
+
+
+class SCTECourseIE(SCTEBaseIE):
+ _VALID_URL = r'https?://learning\.scte\.org/(?:mod/sub)?course/view\.php?.*?\bid=(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://learning.scte.org/mod/subcourse/view.php?id=31491',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://learning.scte.org/course/view.php?id=3639',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://learning.scte.org/course/view.php?id=3073',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ course_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, course_id)
+
+ title = self._search_regex(
+ r'<h1>(.+?)</h1>', webpage, 'title', default=None)
+
+ entries = []
+ for mobj in re.finditer(
+ r'''(?x)
+ <a[^>]+
+ href=(["\'])
+ (?P<url>
+ https?://learning\.scte\.org/mod/
+ (?P<kind>scorm|subcourse)/view\.php?(?:(?!\1).)*?
+ \bid=\d+
+ )
+ ''',
+ webpage):
+ item_url = mobj.group('url')
+ if item_url == url:
+ continue
+ ie = (SCTEIE.ie_key() if mobj.group('kind') == 'scorm'
+ else SCTECourseIE.ie_key())
+ entries.append(self.url_result(item_url, ie=ie))
+
+ return self.playlist_result(entries, course_id, title)
diff --git a/youtube_dlc/extractor/seeker.py b/youtube_dlc/extractor/seeker.py
new file mode 100644
index 000000000..7872dc80d
--- /dev/null
+++ b/youtube_dlc/extractor/seeker.py
@@ -0,0 +1,58 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ get_element_by_class,
+ strip_or_none,
+)
+
+
+class SeekerIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?seeker\.com/(?P<display_id>.*)-(?P<article_id>\d+)\.html'
+ _TESTS = [{
+ 'url': 'http://www.seeker.com/should-trump-be-required-to-release-his-tax-returns-1833805621.html',
+ 'md5': '897d44bbe0d8986a2ead96de565a92db',
+ 'info_dict': {
+ 'id': 'Elrn3gnY',
+ 'ext': 'mp4',
+ 'title': 'Should Trump Be Required To Release His Tax Returns?',
+ 'description': 'md5:41efa8cfa8d627841045eec7b018eb45',
+ 'timestamp': 1490090165,
+ 'upload_date': '20170321',
+ }
+ }, {
+ 'url': 'http://www.seeker.com/changes-expected-at-zoos-following-recent-gorilla-lion-shootings-1834116536.html',
+ 'playlist': [
+ {
+ 'md5': '0497b9f20495174be73ae136949707d2',
+ 'info_dict': {
+ 'id': 'FihYQ8AE',
+ 'ext': 'mp4',
+ 'title': 'The Pros & Cons Of Zoos',
+ 'description': 'md5:d88f99a8ea8e7d25e6ff77f271b1271c',
+ 'timestamp': 1490039133,
+ 'upload_date': '20170320',
+ },
+ }
+ ],
+ 'info_dict': {
+ 'id': '1834116536',
+ 'title': 'After Gorilla Killing, Changes Ahead for Zoos',
+ 'description': 'The largest association of zoos and others are hoping to learn from recent incidents that led to the shooting deaths of a gorilla and two lions.',
+ },
+ }]
+
+ def _real_extract(self, url):
+ display_id, article_id = re.match(self._VALID_URL, url).groups()
+ webpage = self._download_webpage(url, display_id)
+ entries = []
+ for jwp_id in re.findall(r'data-video-id="([a-zA-Z0-9]{8})"', webpage):
+ entries.append(self.url_result(
+ 'jwplatform:' + jwp_id, 'JWPlatform', jwp_id))
+ return self.playlist_result(
+ entries, article_id,
+ self._og_search_title(webpage),
+ strip_or_none(get_element_by_class('subtitle__text', webpage)) or self._og_search_description(webpage))
diff --git a/youtube_dl/extractor/senateisvp.py b/youtube_dlc/extractor/senateisvp.py
index db5ef8b57..db5ef8b57 100644
--- a/youtube_dl/extractor/senateisvp.py
+++ b/youtube_dlc/extractor/senateisvp.py
diff --git a/youtube_dl/extractor/sendtonews.py b/youtube_dlc/extractor/sendtonews.py
index 9d9652949..9d9652949 100644
--- a/youtube_dl/extractor/sendtonews.py
+++ b/youtube_dlc/extractor/sendtonews.py
diff --git a/youtube_dlc/extractor/servus.py b/youtube_dlc/extractor/servus.py
new file mode 100644
index 000000000..9401bf2cf
--- /dev/null
+++ b/youtube_dlc/extractor/servus.py
@@ -0,0 +1,69 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class ServusIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?
+ (?:
+ servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)|
+ servustv\.com/videos
+ )
+ /(?P<id>[aA]{2}-\w+|\d+-\d+)
+ '''
+ _TESTS = [{
+ # new URL schema
+ 'url': 'https://www.servustv.com/videos/aa-1t6vbu5pw1w12/',
+ 'md5': '3e1dd16775aa8d5cbef23628cfffc1f4',
+ 'info_dict': {
+ 'id': 'AA-1T6VBU5PW1W12',
+ 'ext': 'mp4',
+ 'title': 'Die Grünen aus Sicht des Volkes',
+ 'description': 'md5:1247204d85783afe3682644398ff2ec4',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ }
+ }, {
+ # old URL schema
+ 'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.servus.com/at/p/Wie-das-Leben-beginnt/1309984137314-381415152/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.servus.com/tv/videos/aa-1t6vbu5pw1w12/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.servus.com/tv/videos/1380889096408-1235196658/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url).upper()
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._search_regex(
+ (r'videoLabel\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1',
+ r'<h\d+[^>]+\bclass=["\']heading--(?:one|two)["\'][^>]*>(?P<title>[^<]+)'),
+ webpage, 'title', default=None,
+ group='title') or self._og_search_title(webpage)
+ title = re.sub(r'\s*-\s*Servus TV\s*$', '', title)
+ description = self._og_search_description(webpage)
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ formats = self._extract_m3u8_formats(
+ 'https://stv.rbmbtnx.net/api/v1/manifests/%s.m3u8' % video_id,
+ video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/sevenplus.py b/youtube_dlc/extractor/sevenplus.py
index 84568ac69..84568ac69 100644
--- a/youtube_dl/extractor/sevenplus.py
+++ b/youtube_dlc/extractor/sevenplus.py
diff --git a/youtube_dl/extractor/sexu.py b/youtube_dlc/extractor/sexu.py
index 3df51520b..3df51520b 100644
--- a/youtube_dl/extractor/sexu.py
+++ b/youtube_dlc/extractor/sexu.py
diff --git a/youtube_dl/extractor/seznamzpravy.py b/youtube_dlc/extractor/seznamzpravy.py
index 7a1c7e38b..7a1c7e38b 100644
--- a/youtube_dl/extractor/seznamzpravy.py
+++ b/youtube_dlc/extractor/seznamzpravy.py
diff --git a/youtube_dl/extractor/shahid.py b/youtube_dlc/extractor/shahid.py
index 5c2a6206b..5c2a6206b 100644
--- a/youtube_dl/extractor/shahid.py
+++ b/youtube_dlc/extractor/shahid.py
diff --git a/youtube_dlc/extractor/shared.py b/youtube_dlc/extractor/shared.py
new file mode 100644
index 000000000..02295d1a4
--- /dev/null
+++ b/youtube_dlc/extractor/shared.py
@@ -0,0 +1,138 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_b64decode,
+ compat_urllib_parse_unquote_plus,
+)
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ js_to_json,
+ KNOWN_EXTENSIONS,
+ parse_filesize,
+ rot47,
+ url_or_none,
+ urlencode_postdata,
+)
+
+
+class SharedBaseIE(InfoExtractor):
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage, urlh = self._download_webpage_handle(url, video_id)
+
+ if self._FILE_NOT_FOUND in webpage:
+ raise ExtractorError(
+ 'Video %s does not exist' % video_id, expected=True)
+
+ video_url = self._extract_video_url(webpage, video_id, url)
+
+ title = self._extract_title(webpage)
+ filesize = int_or_none(self._extract_filesize(webpage))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'filesize': filesize,
+ 'title': title,
+ }
+
+ def _extract_title(self, webpage):
+ return compat_b64decode(self._html_search_meta(
+ 'full:title', webpage, 'title')).decode('utf-8')
+
+ def _extract_filesize(self, webpage):
+ return self._html_search_meta(
+ 'full:size', webpage, 'file size', fatal=False)
+
+
+class SharedIE(SharedBaseIE):
+ IE_DESC = 'shared.sx'
+ _VALID_URL = r'https?://shared\.sx/(?P<id>[\da-z]{10})'
+ _FILE_NOT_FOUND = '>File does not exist<'
+
+ _TEST = {
+ 'url': 'http://shared.sx/0060718775',
+ 'md5': '106fefed92a8a2adb8c98e6a0652f49b',
+ 'info_dict': {
+ 'id': '0060718775',
+ 'ext': 'mp4',
+ 'title': 'Bmp4',
+ 'filesize': 1720110,
+ },
+ }
+
+ def _extract_video_url(self, webpage, video_id, url):
+ download_form = self._hidden_inputs(webpage)
+
+ video_page = self._download_webpage(
+ url, video_id, 'Downloading video page',
+ data=urlencode_postdata(download_form),
+ headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ 'Referer': url,
+ })
+
+ video_url = self._html_search_regex(
+ r'data-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
+ video_page, 'video URL', group='url')
+
+ return video_url
+
+
+class VivoIE(SharedBaseIE):
+ IE_DESC = 'vivo.sx'
+ _VALID_URL = r'https?://vivo\.sx/(?P<id>[\da-z]{10})'
+ _FILE_NOT_FOUND = '>The file you have requested does not exists or has been removed'
+
+ _TEST = {
+ 'url': 'http://vivo.sx/d7ddda0e78',
+ 'md5': '15b3af41be0b4fe01f4df075c2678b2c',
+ 'info_dict': {
+ 'id': 'd7ddda0e78',
+ 'ext': 'mp4',
+ 'title': 'Chicken',
+ 'filesize': 515659,
+ },
+ }
+
+ def _extract_title(self, webpage):
+ title = self._html_search_regex(
+ r'data-name\s*=\s*(["\'])(?P<title>(?:(?!\1).)+)\1', webpage,
+ 'title', default=None, group='title')
+ if title:
+ ext = determine_ext(title)
+ if ext.lower() in KNOWN_EXTENSIONS:
+ title = title.rpartition('.' + ext)[0]
+ return title
+ return self._og_search_title(webpage)
+
+ def _extract_filesize(self, webpage):
+ return parse_filesize(self._search_regex(
+ r'data-type=["\']video["\'][^>]*>Watch.*?<strong>\s*\((.+?)\)',
+ webpage, 'filesize', fatal=False))
+
+ def _extract_video_url(self, webpage, video_id, url):
+ def decode_url_old(encoded_url):
+ return compat_b64decode(encoded_url).decode('utf-8')
+
+ stream_url = self._search_regex(
+ r'data-stream\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'stream url', default=None, group='url')
+ if stream_url:
+ stream_url = url_or_none(decode_url_old(stream_url))
+ if stream_url:
+ return stream_url
+
+ def decode_url(encoded_url):
+ return rot47(compat_urllib_parse_unquote_plus(encoded_url))
+
+ return decode_url(self._parse_json(
+ self._search_regex(
+ r'(?s)InitializeStream\s*\(\s*({.+?})\s*\)\s*;', webpage,
+ 'stream'),
+ video_id, transform_source=js_to_json)['source'])
diff --git a/youtube_dl/extractor/showroomlive.py b/youtube_dlc/extractor/showroomlive.py
index efd9d561f..efd9d561f 100644
--- a/youtube_dl/extractor/showroomlive.py
+++ b/youtube_dlc/extractor/showroomlive.py
diff --git a/youtube_dl/extractor/sina.py b/youtube_dlc/extractor/sina.py
index 07b766b4a..07b766b4a 100644
--- a/youtube_dl/extractor/sina.py
+++ b/youtube_dlc/extractor/sina.py
diff --git a/youtube_dl/extractor/sixplay.py b/youtube_dlc/extractor/sixplay.py
index 7ec66ecf3..7ec66ecf3 100644
--- a/youtube_dl/extractor/sixplay.py
+++ b/youtube_dlc/extractor/sixplay.py
diff --git a/youtube_dl/extractor/sky.py b/youtube_dlc/extractor/sky.py
index ea30d6e62..ea30d6e62 100644
--- a/youtube_dl/extractor/sky.py
+++ b/youtube_dlc/extractor/sky.py
diff --git a/youtube_dl/extractor/skylinewebcams.py b/youtube_dlc/extractor/skylinewebcams.py
index b7f8ac736..b7f8ac736 100644
--- a/youtube_dl/extractor/skylinewebcams.py
+++ b/youtube_dlc/extractor/skylinewebcams.py
diff --git a/youtube_dl/extractor/skynewsarabia.py b/youtube_dlc/extractor/skynewsarabia.py
index fffc9aa22..fffc9aa22 100644
--- a/youtube_dl/extractor/skynewsarabia.py
+++ b/youtube_dlc/extractor/skynewsarabia.py
diff --git a/youtube_dl/extractor/slideshare.py b/youtube_dlc/extractor/slideshare.py
index e89ebebe7..e89ebebe7 100644
--- a/youtube_dl/extractor/slideshare.py
+++ b/youtube_dlc/extractor/slideshare.py
diff --git a/youtube_dlc/extractor/slideslive.py b/youtube_dlc/extractor/slideslive.py
new file mode 100644
index 000000000..d9ea76831
--- /dev/null
+++ b/youtube_dlc/extractor/slideslive.py
@@ -0,0 +1,61 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import smuggle_url
+
+
+class SlidesLiveIE(InfoExtractor):
+ _VALID_URL = r'https?://slideslive\.com/(?P<id>[0-9]+)'
+ _TESTS = [{
+ # video_service_name = YOUTUBE
+ 'url': 'https://slideslive.com/38902413/gcc-ia16-backend',
+ 'md5': 'b29fcd6c6952d0c79c5079b0e7a07e6f',
+ 'info_dict': {
+ 'id': 'LMtgR8ba0b0',
+ 'ext': 'mp4',
+ 'title': 'GCC IA16 backend',
+ 'description': 'Watch full version of this video at https://slideslive.com/38902413.',
+ 'uploader': 'SlidesLive Videos - A',
+ 'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
+ 'upload_date': '20170925',
+ }
+ }, {
+ # video_service_name = youtube
+ 'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend',
+ 'only_matching': True,
+ }, {
+ # video_service_name = url
+ 'url': 'https://slideslive.com/38922070/learning-transferable-skills-1',
+ 'only_matching': True,
+ }, {
+ # video_service_name = vimeo
+ 'url': 'https://slideslive.com/38921896/retrospectives-a-venue-for-selfreflection-in-ml-research-3',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._download_json(
+ 'https://ben.slideslive.com/player/' + video_id, video_id)
+ service_name = video_data['video_service_name'].lower()
+ assert service_name in ('url', 'vimeo', 'youtube')
+ service_id = video_data['video_service_id']
+ info = {
+ 'id': video_id,
+ 'thumbnail': video_data.get('thumbnail'),
+ 'url': service_id,
+ }
+ if service_name == 'url':
+ info['title'] = video_data['title']
+ else:
+ info.update({
+ '_type': 'url_transparent',
+ 'ie_key': service_name.capitalize(),
+ 'title': video_data.get('title'),
+ })
+ if service_name == 'vimeo':
+ info['url'] = smuggle_url(
+ 'https://player.vimeo.com/video/' + service_id,
+ {'http_headers': {'Referer': url}})
+ return info
diff --git a/youtube_dl/extractor/slutload.py b/youtube_dlc/extractor/slutload.py
index 661f9e59d..661f9e59d 100644
--- a/youtube_dl/extractor/slutload.py
+++ b/youtube_dlc/extractor/slutload.py
diff --git a/youtube_dl/extractor/smotri.py b/youtube_dlc/extractor/smotri.py
index 45995f30f..45995f30f 100644
--- a/youtube_dl/extractor/smotri.py
+++ b/youtube_dlc/extractor/smotri.py
diff --git a/youtube_dl/extractor/snotr.py b/youtube_dlc/extractor/snotr.py
index f77354748..f77354748 100644
--- a/youtube_dl/extractor/snotr.py
+++ b/youtube_dlc/extractor/snotr.py
diff --git a/youtube_dlc/extractor/sohu.py b/youtube_dlc/extractor/sohu.py
new file mode 100644
index 000000000..76b3cc6b6
--- /dev/null
+++ b/youtube_dlc/extractor/sohu.py
@@ -0,0 +1,202 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_urlencode,
+)
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ try_get,
+)
+
+
+class SohuIE(InfoExtractor):
+ _VALID_URL = r'https?://(?P<mytv>my\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P<id>\d+)\.shtml.*?'
+
+ # Sohu videos give different MD5 sums on Travis CI and my machine
+ _TESTS = [{
+ 'note': 'This video is available only in Mainland China',
+ 'url': 'http://tv.sohu.com/20130724/n382479172.shtml#super',
+ 'info_dict': {
+ 'id': '382479172',
+ 'ext': 'mp4',
+ 'title': 'MV:Far East Movement《The Illest》',
+ },
+ 'skip': 'On available in China',
+ }, {
+ 'url': 'http://tv.sohu.com/20150305/n409385080.shtml',
+ 'info_dict': {
+ 'id': '409385080',
+ 'ext': 'mp4',
+ 'title': '《2015湖南卫视羊年元宵晚会》唐嫣《花好月圆》',
+ }
+ }, {
+ 'url': 'http://my.tv.sohu.com/us/232799889/78693464.shtml',
+ 'info_dict': {
+ 'id': '78693464',
+ 'ext': 'mp4',
+ 'title': '【爱范品】第31期:MWC见不到的奇葩手机',
+ }
+ }, {
+ 'note': 'Multipart video',
+ 'url': 'http://my.tv.sohu.com/pl/8384802/78910339.shtml',
+ 'info_dict': {
+ 'id': '78910339',
+ 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '78910339_part1',
+ 'ext': 'mp4',
+ 'duration': 294,
+ 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+ }
+ }, {
+ 'info_dict': {
+ 'id': '78910339_part2',
+ 'ext': 'mp4',
+ 'duration': 300,
+ 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+ }
+ }, {
+ 'info_dict': {
+ 'id': '78910339_part3',
+ 'ext': 'mp4',
+ 'duration': 150,
+ 'title': '【神探苍实战秘籍】第13期 战争之影 赫卡里姆',
+ }
+ }]
+ }, {
+ 'note': 'Video with title containing dash',
+ 'url': 'http://my.tv.sohu.com/us/249884221/78932792.shtml',
+ 'info_dict': {
+ 'id': '78932792',
+ 'ext': 'mp4',
+ 'title': 'youtube-dlc testing video',
+ },
+ 'params': {
+ 'skip_download': True
+ }
+ }]
+
+ def _real_extract(self, url):
+
+ def _fetch_data(vid_id, mytv=False):
+ if mytv:
+ base_data_url = 'http://my.tv.sohu.com/play/videonew.do?vid='
+ else:
+ base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid='
+
+ return self._download_json(
+ base_data_url + vid_id, video_id,
+ 'Downloading JSON data for %s' % vid_id,
+ headers=self.geo_verification_headers())
+
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ mytv = mobj.group('mytv') is not None
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = re.sub(r' - 搜狐视频$', '', self._og_search_title(webpage))
+
+ vid = self._html_search_regex(
+ r'var vid ?= ?["\'](\d+)["\']',
+ webpage, 'video path')
+ vid_data = _fetch_data(vid, mytv)
+ if vid_data['play'] != 1:
+ if vid_data.get('status') == 12:
+ raise ExtractorError(
+ '%s said: There\'s something wrong in the video.' % self.IE_NAME,
+ expected=True)
+ else:
+ self.raise_geo_restricted(
+ '%s said: The video is only licensed to users in Mainland China.' % self.IE_NAME)
+
+ formats_json = {}
+ for format_id in ('nor', 'high', 'super', 'ori', 'h2644k', 'h2654k'):
+ vid_id = vid_data['data'].get('%sVid' % format_id)
+ if not vid_id:
+ continue
+ vid_id = compat_str(vid_id)
+ formats_json[format_id] = vid_data if vid == vid_id else _fetch_data(vid_id, mytv)
+
+ part_count = vid_data['data']['totalBlocks']
+
+ playlist = []
+ for i in range(part_count):
+ formats = []
+ for format_id, format_data in formats_json.items():
+ allot = format_data['allot']
+
+ data = format_data['data']
+ clips_url = data['clipsURL']
+ su = data['su']
+
+ video_url = 'newflv.sohu.ccgslb.net'
+ cdnId = None
+ retries = 0
+
+ while 'newflv.sohu.ccgslb.net' in video_url:
+ params = {
+ 'prot': 9,
+ 'file': clips_url[i],
+ 'new': su[i],
+ 'prod': 'flash',
+ 'rb': 1,
+ }
+
+ if cdnId is not None:
+ params['idc'] = cdnId
+
+ download_note = 'Downloading %s video URL part %d of %d' % (
+ format_id, i + 1, part_count)
+
+ if retries > 0:
+ download_note += ' (retry #%d)' % retries
+ part_info = self._parse_json(self._download_webpage(
+ 'http://%s/?%s' % (allot, compat_urllib_parse_urlencode(params)),
+ video_id, download_note), video_id)
+
+ video_url = part_info['url']
+ cdnId = part_info.get('nid')
+
+ retries += 1
+ if retries > 5:
+ raise ExtractorError('Failed to get video URL')
+
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ 'filesize': int_or_none(
+ try_get(data, lambda x: x['clipsBytes'][i])),
+ 'width': int_or_none(data.get('width')),
+ 'height': int_or_none(data.get('height')),
+ 'fps': int_or_none(data.get('fps')),
+ })
+ self._sort_formats(formats)
+
+ playlist.append({
+ 'id': '%s_part%d' % (video_id, i + 1),
+ 'title': title,
+ 'duration': vid_data['data']['clipsDuration'][i],
+ 'formats': formats,
+ })
+
+ if len(playlist) == 1:
+ info = playlist[0]
+ info['id'] = video_id
+ else:
+ info = {
+ '_type': 'multi_video',
+ 'entries': playlist,
+ 'id': video_id,
+ 'title': title,
+ }
+
+ return info
diff --git a/youtube_dl/extractor/sonyliv.py b/youtube_dlc/extractor/sonyliv.py
index 58a8c0d4d..58a8c0d4d 100644
--- a/youtube_dl/extractor/sonyliv.py
+++ b/youtube_dlc/extractor/sonyliv.py
diff --git a/youtube_dlc/extractor/soundcloud.py b/youtube_dlc/extractor/soundcloud.py
new file mode 100644
index 000000000..0fe084f5c
--- /dev/null
+++ b/youtube_dlc/extractor/soundcloud.py
@@ -0,0 +1,899 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+import re
+import json
+import random
+
+from .common import (
+ InfoExtractor,
+ SearchInfoExtractor
+)
+from ..compat import (
+ compat_HTTPError,
+ compat_kwargs,
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ error_to_compat_str,
+ ExtractorError,
+ float_or_none,
+ HEADRequest,
+ int_or_none,
+ KNOWN_EXTENSIONS,
+ mimetype2ext,
+ str_or_none,
+ try_get,
+ unified_timestamp,
+ update_url_query,
+ url_or_none,
+ urlhandle_detect_ext,
+ sanitized_Request,
+)
+
+
+class SoundcloudEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:w|player|p)\.soundcloud\.com/player/?.*?\burl=(?P<id>.+)'
+ _TEST = {
+ # from https://www.soundi.fi/uutiset/ennakkokuuntelussa-timo-kaukolammen-station-to-station-to-station-julkaisua-juhlitaan-tanaan-g-livelabissa/
+ 'url': 'https://w.soundcloud.com/player/?visual=true&url=https%3A%2F%2Fapi.soundcloud.com%2Fplaylists%2F922213810&show_artwork=true&maxwidth=640&maxheight=960&dnt=1&secret_token=s-ziYey',
+ 'only_matching': True,
+ }
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [m.group('url') for m in re.finditer(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?://)?(?:w\.)?soundcloud\.com/player.+?)\1',
+ webpage)]
+
+ def _real_extract(self, url):
+ query = compat_urlparse.parse_qs(
+ compat_urlparse.urlparse(url).query)
+ api_url = query['url'][0]
+ secret_token = query.get('secret_token')
+ if secret_token:
+ api_url = update_url_query(api_url, {'secret_token': secret_token[0]})
+ return self.url_result(api_url)
+
+
+class SoundcloudIE(InfoExtractor):
+ """Information extractor for soundcloud.com
+ To access the media, the uid of the song and a stream token
+ must be extracted from the page source and the script must make
+ a request to media.soundcloud.com/crossdomain.xml. Then
+ the media can be grabbed by requesting from an url composed
+ of the stream token and uid
+ """
+
+ _VALID_URL = r'''(?x)^(?:https?://)?
+ (?:(?:(?:www\.|m\.)?soundcloud\.com/
+ (?!stations/track)
+ (?P<uploader>[\w\d-]+)/
+ (?!(?:tracks|albums|sets(?:/.+?)?|reposts|likes|spotlight)/?(?:$|[?#]))
+ (?P<title>[\w\d-]+)/?
+ (?P<token>[^?]+?)?(?:[?].*)?$)
+ |(?:api(?:-v2)?\.soundcloud\.com/tracks/(?P<track_id>\d+)
+ (?:/?\?secret_token=(?P<secret_token>[^&]+))?)
+ )
+ '''
+ IE_NAME = 'soundcloud'
+ _TESTS = [
+ {
+ 'url': 'http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy',
+ 'md5': 'ebef0a451b909710ed1d7787dddbf0d7',
+ 'info_dict': {
+ 'id': '62986583',
+ 'ext': 'mp3',
+ 'title': 'Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1',
+ 'description': 'No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o\'d',
+ 'uploader': 'E.T. ExTerrestrial Music',
+ 'uploader_id': '1571244',
+ 'timestamp': 1349920598,
+ 'upload_date': '20121011',
+ 'duration': 143.216,
+ 'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ }
+ },
+ # geo-restricted
+ {
+ 'url': 'https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep',
+ 'info_dict': {
+ 'id': '47127627',
+ 'ext': 'mp3',
+ 'title': 'Goldrushed',
+ 'description': 'From Stockholm Sweden\r\nPovel / Magnus / Filip / David\r\nwww.theroyalconcept.com',
+ 'uploader': 'The Royal Concept',
+ 'uploader_id': '9615865',
+ 'timestamp': 1337635207,
+ 'upload_date': '20120521',
+ 'duration': 227.155,
+ 'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ },
+ # private link
+ {
+ 'url': 'https://soundcloud.com/jaimemf/youtube-dlc-test-video-a-y-baw/s-8Pjrp',
+ 'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604',
+ 'info_dict': {
+ 'id': '123998367',
+ 'ext': 'mp3',
+ 'title': 'Youtube - Dl Test Video \'\' Ä↭',
+ 'description': 'test chars: \"\'/\\ä↭',
+ 'uploader': 'jaimeMF',
+ 'uploader_id': '69767071',
+ 'timestamp': 1386604920,
+ 'upload_date': '20131209',
+ 'duration': 9.927,
+ 'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ },
+ # private link (alt format)
+ {
+ 'url': 'https://api.soundcloud.com/tracks/123998367?secret_token=s-8Pjrp',
+ 'md5': 'aa0dd32bfea9b0c5ef4f02aacd080604',
+ 'info_dict': {
+ 'id': '123998367',
+ 'ext': 'mp3',
+ 'title': 'Youtube - Dl Test Video \'\' Ä↭',
+ 'description': 'test chars: \"\'/\\ä↭',
+ 'uploader': 'jaimeMF',
+ 'uploader_id': '69767071',
+ 'timestamp': 1386604920,
+ 'upload_date': '20131209',
+ 'duration': 9.927,
+ 'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ },
+ # downloadable song
+ {
+ 'url': 'https://soundcloud.com/oddsamples/bus-brakes',
+ 'md5': '7624f2351f8a3b2e7cd51522496e7631',
+ 'info_dict': {
+ 'id': '128590877',
+ 'ext': 'mp3',
+ 'title': 'Bus Brakes',
+ 'description': 'md5:0053ca6396e8d2fd7b7e1595ef12ab66',
+ 'uploader': 'oddsamples',
+ 'uploader_id': '73680509',
+ 'timestamp': 1389232924,
+ 'upload_date': '20140109',
+ 'duration': 17.346,
+ 'license': 'cc-by-sa',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ },
+ # private link, downloadable format
+ {
+ 'url': 'https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd',
+ 'md5': '64a60b16e617d41d0bef032b7f55441e',
+ 'info_dict': {
+ 'id': '340344461',
+ 'ext': 'wav',
+ 'title': 'Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav]',
+ 'description': 'md5:fa20ee0fca76a3d6df8c7e57f3715366',
+ 'uploader': 'Ori Uplift Music',
+ 'uploader_id': '12563093',
+ 'timestamp': 1504206263,
+ 'upload_date': '20170831',
+ 'duration': 7449.096,
+ 'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ },
+ # no album art, use avatar pic for thumbnail
+ {
+ 'url': 'https://soundcloud.com/garyvee/sideways-prod-mad-real',
+ 'md5': '59c7872bc44e5d99b7211891664760c2',
+ 'info_dict': {
+ 'id': '309699954',
+ 'ext': 'mp3',
+ 'title': 'Sideways (Prod. Mad Real)',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'uploader': 'garyvee',
+ 'uploader_id': '2366352',
+ 'timestamp': 1488152409,
+ 'upload_date': '20170226',
+ 'duration': 207.012,
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://soundcloud.com/giovannisarani/mezzo-valzer',
+ 'md5': 'e22aecd2bc88e0e4e432d7dcc0a1abf7',
+ 'info_dict': {
+ 'id': '583011102',
+ 'ext': 'mp3',
+ 'title': 'Mezzo Valzer',
+ 'description': 'md5:4138d582f81866a530317bae316e8b61',
+ 'uploader': 'Micronie',
+ 'uploader_id': '3352531',
+ 'timestamp': 1551394171,
+ 'upload_date': '20190228',
+ 'duration': 180.157,
+ 'thumbnail': r're:https?://.*\.jpg',
+ 'license': 'all-rights-reserved',
+ 'view_count': int,
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ },
+ {
+ # with AAC HQ format available via OAuth token
+ 'url': 'https://soundcloud.com/wandw/the-chainsmokers-ft-daya-dont-let-me-down-ww-remix-1',
+ 'only_matching': True,
+ },
+ ]
+
+ _API_V2_BASE = 'https://api-v2.soundcloud.com/'
+ _BASE_URL = 'https://soundcloud.com/'
+ _IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg'
+
+ _ARTWORK_MAP = {
+ 'mini': 16,
+ 'tiny': 20,
+ 'small': 32,
+ 'badge': 47,
+ 't67x67': 67,
+ 'large': 100,
+ 't300x300': 300,
+ 'crop': 400,
+ 't500x500': 500,
+ 'original': 0,
+ }
+
+ def _store_client_id(self, client_id):
+ self._downloader.cache.store('soundcloud', 'client_id', client_id)
+
+ def _update_client_id(self):
+ webpage = self._download_webpage('https://soundcloud.com/', None)
+ for src in reversed(re.findall(r'<script[^>]+src="([^"]+)"', webpage)):
+ script = self._download_webpage(src, None, fatal=False)
+ if script:
+ client_id = self._search_regex(
+ r'client_id\s*:\s*"([0-9a-zA-Z]{32})"',
+ script, 'client id', default=None)
+ if client_id:
+ self._CLIENT_ID = client_id
+ self._store_client_id(client_id)
+ return
+ raise ExtractorError('Unable to extract client id')
+
+ def _download_json(self, *args, **kwargs):
+ non_fatal = kwargs.get('fatal') is False
+ if non_fatal:
+ del kwargs['fatal']
+ query = kwargs.get('query', {}).copy()
+ for _ in range(2):
+ query['client_id'] = self._CLIENT_ID
+ kwargs['query'] = query
+ try:
+ return super(SoundcloudIE, self)._download_json(*args, **compat_kwargs(kwargs))
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ self._store_client_id(None)
+ self._update_client_id()
+ continue
+ elif non_fatal:
+ self._downloader.report_warning(error_to_compat_str(e))
+ return False
+ raise
+
+ def _real_initialize(self):
+ self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or "T5R4kgWS2PRf6lzLyIravUMnKlbIxQag" # 'EXLwg5lHTO2dslU5EePe3xkw0m1h86Cd' # 'YUKXoArFcqrlQn9tfNHvvyfnDISj04zk'
+ self._login()
+
+ _USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36"
+ _API_AUTH_QUERY_TEMPLATE = '?client_id=%s'
+ _API_AUTH_URL_PW = 'https://api-auth.soundcloud.com/web-auth/sign-in/password%s'
+ _access_token = None
+ _HEADERS = {}
+ _NETRC_MACHINE = 'soundcloud'
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ def genDevId():
+ def genNumBlock():
+ return ''.join([str(random.randrange(10)) for i in range(6)])
+ return '-'.join([genNumBlock() for i in range(4)])
+
+ payload = {
+ 'client_id': self._CLIENT_ID,
+ 'recaptcha_pubkey': 'null',
+ 'recaptcha_response': 'null',
+ 'credentials': {
+ 'identifier': username,
+ 'password': password
+ },
+ 'signature': self.sign(username, password, self._CLIENT_ID),
+ 'device_id': genDevId(),
+ 'user_agent': self._USER_AGENT
+ }
+
+ query = self._API_AUTH_QUERY_TEMPLATE % self._CLIENT_ID
+ login = sanitized_Request(self._API_AUTH_URL_PW % query, json.dumps(payload).encode('utf-8'))
+ response = self._download_json(login, None)
+ self._access_token = response.get('session').get('access_token')
+ if not self._access_token:
+ self.report_warning('Unable to get access token, login may has failed')
+ else:
+ self._HEADERS = {'Authorization': 'OAuth ' + self._access_token}
+
+ # signature generation
+ def sign(self, user, pw, clid):
+ a = 33
+ i = 1
+ s = 440123
+ w = 117
+ u = 1800000
+ l = 1042
+ b = 37
+ k = 37
+ c = 5
+ n = "0763ed7314c69015fd4a0dc16bbf4b90" # _KEY
+ y = "8" # _REV
+ r = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36" # _USER_AGENT
+ e = user # _USERNAME
+ t = clid # _CLIENT_ID
+
+ d = '-'.join([str(mInt) for mInt in [a, i, s, w, u, l, b, k]])
+ p = n + y + d + r + e + t + d + n
+ h = p
+
+ m = 8011470
+ f = 0
+
+ for f in range(f, len(h)):
+ m = (m >> 1) + ((1 & m) << 23)
+ m += ord(h[f])
+ m &= 16777215
+
+ # c is not even needed
+ out = str(y) + ':' + str(d) + ':' + format(m, 'x') + ':' + str(c)
+
+ return out
+
+ @classmethod
+ def _resolv_url(cls, url):
+ return SoundcloudIE._API_V2_BASE + 'resolve?url=' + url
+
+ def _extract_info_dict(self, info, full_title=None, secret_token=None):
+ track_id = compat_str(info['id'])
+ title = info['title']
+
+ format_urls = set()
+ formats = []
+ query = {'client_id': self._CLIENT_ID}
+ if secret_token:
+ query['secret_token'] = secret_token
+
+ if info.get('downloadable') and info.get('has_downloads_left'):
+ download_url = update_url_query(
+ self._API_V2_BASE + 'tracks/' + track_id + '/download', query)
+ redirect_url = (self._download_json(download_url, track_id, fatal=False) or {}).get('redirectUri')
+ if redirect_url:
+ urlh = self._request_webpage(
+ HEADRequest(redirect_url), track_id, fatal=False)
+ if urlh:
+ format_url = urlh.geturl()
+ format_urls.add(format_url)
+ formats.append({
+ 'format_id': 'download',
+ 'ext': urlhandle_detect_ext(urlh) or 'mp3',
+ 'filesize': int_or_none(urlh.headers.get('Content-Length')),
+ 'url': format_url,
+ 'preference': 10,
+ })
+
+ def invalid_url(url):
+ return not url or url in format_urls
+
+ def add_format(f, protocol, is_preview=False):
+ mobj = re.search(r'\.(?P<abr>\d+)\.(?P<ext>[0-9a-z]{3,4})(?=[/?])', stream_url)
+ if mobj:
+ for k, v in mobj.groupdict().items():
+ if not f.get(k):
+ f[k] = v
+ format_id_list = []
+ if protocol:
+ format_id_list.append(protocol)
+ ext = f.get('ext')
+ if ext == 'aac':
+ f['abr'] = '256'
+ for k in ('ext', 'abr'):
+ v = f.get(k)
+ if v:
+ format_id_list.append(v)
+ preview = is_preview or re.search(r'/(?:preview|playlist)/0/30/', f['url'])
+ if preview:
+ format_id_list.append('preview')
+ abr = f.get('abr')
+ if abr:
+ f['abr'] = int(abr)
+ if protocol == 'hls':
+ protocol = 'm3u8' if ext == 'aac' else 'm3u8_native'
+ else:
+ protocol = 'http'
+ f.update({
+ 'format_id': '_'.join(format_id_list),
+ 'protocol': protocol,
+ 'preference': -10 if preview else None,
+ })
+ formats.append(f)
+
+ # New API
+ transcodings = try_get(
+ info, lambda x: x['media']['transcodings'], list) or []
+ for t in transcodings:
+ if not isinstance(t, dict):
+ continue
+ format_url = url_or_none(t.get('url'))
+ if not format_url:
+ continue
+ stream = self._download_json(
+ format_url, track_id, query=query, fatal=False, headers=self._HEADERS)
+ if not isinstance(stream, dict):
+ continue
+ stream_url = url_or_none(stream.get('url'))
+ if invalid_url(stream_url):
+ continue
+ format_urls.add(stream_url)
+ stream_format = t.get('format') or {}
+ protocol = stream_format.get('protocol')
+ if protocol != 'hls' and '/hls' in format_url:
+ protocol = 'hls'
+ ext = None
+ preset = str_or_none(t.get('preset'))
+ if preset:
+ ext = preset.split('_')[0]
+ if ext not in KNOWN_EXTENSIONS:
+ ext = mimetype2ext(stream_format.get('mime_type'))
+ add_format({
+ 'url': stream_url,
+ 'ext': ext,
+ }, 'http' if protocol == 'progressive' else protocol,
+ t.get('snipped') or '/preview/' in format_url)
+
+ for f in formats:
+ f['vcodec'] = 'none'
+
+ if not formats and info.get('policy') == 'BLOCK':
+ self.raise_geo_restricted()
+ self._sort_formats(formats)
+
+ user = info.get('user') or {}
+
+ thumbnails = []
+ artwork_url = info.get('artwork_url')
+ thumbnail = artwork_url or user.get('avatar_url')
+ if isinstance(thumbnail, compat_str):
+ if re.search(self._IMAGE_REPL_RE, thumbnail):
+ for image_id, size in self._ARTWORK_MAP.items():
+ i = {
+ 'id': image_id,
+ 'url': re.sub(self._IMAGE_REPL_RE, '-%s.jpg' % image_id, thumbnail),
+ }
+ if image_id == 'tiny' and not artwork_url:
+ size = 18
+ elif image_id == 'original':
+ i['preference'] = 10
+ if size:
+ i.update({
+ 'width': size,
+ 'height': size,
+ })
+ thumbnails.append(i)
+ else:
+ thumbnails = [{'url': thumbnail}]
+
+ def extract_count(key):
+ return int_or_none(info.get('%s_count' % key))
+
+ return {
+ 'id': track_id,
+ 'uploader': user.get('username'),
+ 'uploader_id': str_or_none(user.get('id')) or user.get('permalink'),
+ 'uploader_url': user.get('permalink_url'),
+ 'timestamp': unified_timestamp(info.get('created_at')),
+ 'title': title,
+ 'description': info.get('description'),
+ 'thumbnails': thumbnails,
+ 'duration': float_or_none(info.get('duration'), 1000),
+ 'webpage_url': info.get('permalink_url'),
+ 'license': info.get('license'),
+ 'view_count': extract_count('playback'),
+ 'like_count': extract_count('favoritings') or extract_count('likes'),
+ 'comment_count': extract_count('comment'),
+ 'repost_count': extract_count('reposts'),
+ 'genre': info.get('genre'),
+ 'formats': formats
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+
+ track_id = mobj.group('track_id')
+
+ query = {}
+ if track_id:
+ info_json_url = self._API_V2_BASE + 'tracks/' + track_id
+ full_title = track_id
+ token = mobj.group('secret_token')
+ if token:
+ query['secret_token'] = token
+ else:
+ full_title = resolve_title = '%s/%s' % mobj.group('uploader', 'title')
+ token = mobj.group('token')
+ if token:
+ resolve_title += '/%s' % token
+ info_json_url = self._resolv_url(self._BASE_URL + resolve_title)
+
+ info = self._download_json(
+ info_json_url, full_title, 'Downloading info JSON', query=query, headers=self._HEADERS)
+
+ return self._extract_info_dict(info, full_title, token)
+
+
+class SoundcloudPlaylistBaseIE(SoundcloudIE):
+ def _extract_set(self, playlist, token=None):
+ playlist_id = compat_str(playlist['id'])
+ tracks = playlist.get('tracks') or []
+ if not all([t.get('permalink_url') for t in tracks]) and token:
+ tracks = self._download_json(
+ self._API_V2_BASE + 'tracks', playlist_id,
+ 'Downloading tracks', query={
+ 'ids': ','.join([compat_str(t['id']) for t in tracks]),
+ 'playlistId': playlist_id,
+ 'playlistSecretToken': token,
+ }, headers=self._HEADERS)
+ entries = []
+ for track in tracks:
+ track_id = str_or_none(track.get('id'))
+ url = track.get('permalink_url')
+ if not url:
+ if not track_id:
+ continue
+ url = self._API_V2_BASE + 'tracks/' + track_id
+ if token:
+ url += '?secret_token=' + token
+ entries.append(self.url_result(
+ url, SoundcloudIE.ie_key(), track_id))
+ return self.playlist_result(
+ entries, playlist_id,
+ playlist.get('title'),
+ playlist.get('description'))
+
+
+class SoundcloudSetIE(SoundcloudPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/(?P<uploader>[\w\d-]+)/sets/(?P<slug_title>[:\w\d-]+)(?:/(?P<token>[^?/]+))?'
+ IE_NAME = 'soundcloud:set'
+ _TESTS = [{
+ 'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',
+ 'info_dict': {
+ 'id': '2284613',
+ 'title': 'The Royal Concept EP',
+ 'description': 'md5:71d07087c7a449e8941a70a29e34671e',
+ },
+ 'playlist_mincount': 5,
+ }, {
+ 'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://soundcloud.com/discover/sets/weekly::flacmatic',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://soundcloud.com/discover/sets/charts-top:all-music:de',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://soundcloud.com/discover/sets/charts-top:hiphoprap:kr',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+
+ full_title = '%s/sets/%s' % mobj.group('uploader', 'slug_title')
+ token = mobj.group('token')
+ if token:
+ full_title += '/' + token
+
+ info = self._download_json(self._resolv_url(
+ self._BASE_URL + full_title), full_title, headers=self._HEADERS)
+
+ if 'errors' in info:
+ msgs = (compat_str(err['error_message']) for err in info['errors'])
+ raise ExtractorError('unable to download video webpage: %s' % ','.join(msgs))
+
+ return self._extract_set(info, token)
+
+
+class SoundcloudPagedPlaylistBaseIE(SoundcloudIE):
+ def _extract_playlist(self, base_url, playlist_id, playlist_title):
+ COMMON_QUERY = {
+ 'limit': 80000,
+ 'linked_partitioning': '1',
+ }
+
+ query = COMMON_QUERY.copy()
+ query['offset'] = 0
+
+ next_href = base_url
+
+ entries = []
+ for i in itertools.count():
+ response = self._download_json(
+ next_href, playlist_id,
+ 'Downloading track page %s' % (i + 1), query=query, headers=self._HEADERS)
+
+ collection = response['collection']
+
+ if not isinstance(collection, list):
+ collection = []
+
+ # Empty collection may be returned, in this case we proceed
+ # straight to next_href
+
+ def resolve_entry(candidates):
+ for cand in candidates:
+ if not isinstance(cand, dict):
+ continue
+ permalink_url = url_or_none(cand.get('permalink_url'))
+ if not permalink_url:
+ continue
+ return self.url_result(
+ permalink_url,
+ SoundcloudIE.ie_key() if SoundcloudIE.suitable(permalink_url) else None,
+ str_or_none(cand.get('id')), cand.get('title'))
+
+ for e in collection:
+ entry = resolve_entry((e, e.get('track'), e.get('playlist')))
+ if entry:
+ entries.append(entry)
+
+ next_href = response.get('next_href')
+ if not next_href:
+ break
+
+ next_href = response['next_href']
+ parsed_next_href = compat_urlparse.urlparse(next_href)
+ query = compat_urlparse.parse_qs(parsed_next_href.query)
+ query.update(COMMON_QUERY)
+
+ return {
+ '_type': 'playlist',
+ 'id': playlist_id,
+ 'title': playlist_title,
+ 'entries': entries,
+ }
+
+
+class SoundcloudUserIE(SoundcloudPagedPlaylistBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:(?:www|m)\.)?soundcloud\.com/
+ (?P<user>[^/]+)
+ (?:/
+ (?P<rsrc>tracks|albums|sets|reposts|likes|spotlight)
+ )?
+ /?(?:[?#].*)?$
+ '''
+ IE_NAME = 'soundcloud:user'
+ _TESTS = [{
+ 'url': 'https://soundcloud.com/soft-cell-official',
+ 'info_dict': {
+ 'id': '207965082',
+ 'title': 'Soft Cell (All)',
+ },
+ 'playlist_mincount': 28,
+ }, {
+ 'url': 'https://soundcloud.com/soft-cell-official/tracks',
+ 'info_dict': {
+ 'id': '207965082',
+ 'title': 'Soft Cell (Tracks)',
+ },
+ 'playlist_mincount': 27,
+ }, {
+ 'url': 'https://soundcloud.com/soft-cell-official/albums',
+ 'info_dict': {
+ 'id': '207965082',
+ 'title': 'Soft Cell (Albums)',
+ },
+ 'playlist_mincount': 1,
+ }, {
+ 'url': 'https://soundcloud.com/jcv246/sets',
+ 'info_dict': {
+ 'id': '12982173',
+ 'title': 'Jordi / cv (Sets)',
+ },
+ 'playlist_mincount': 2,
+ }, {
+ 'url': 'https://soundcloud.com/jcv246/reposts',
+ 'info_dict': {
+ 'id': '12982173',
+ 'title': 'Jordi / cv (Reposts)',
+ },
+ 'playlist_mincount': 6,
+ }, {
+ 'url': 'https://soundcloud.com/clalberg/likes',
+ 'info_dict': {
+ 'id': '11817582',
+ 'title': 'clalberg (Likes)',
+ },
+ 'playlist_mincount': 5,
+ }, {
+ 'url': 'https://soundcloud.com/grynpyret/spotlight',
+ 'info_dict': {
+ 'id': '7098329',
+ 'title': 'Grynpyret (Spotlight)',
+ },
+ 'playlist_mincount': 1,
+ }]
+
+ _BASE_URL_MAP = {
+ 'all': 'stream/users/%s',
+ 'tracks': 'users/%s/tracks',
+ 'albums': 'users/%s/albums',
+ 'sets': 'users/%s/playlists',
+ 'reposts': 'stream/users/%s/reposts',
+ 'likes': 'users/%s/likes',
+ 'spotlight': 'users/%s/spotlight',
+ }
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ uploader = mobj.group('user')
+
+ user = self._download_json(
+ self._resolv_url(self._BASE_URL + uploader),
+ uploader, 'Downloading user info', headers=self._HEADERS)
+
+ resource = mobj.group('rsrc') or 'all'
+
+ return self._extract_playlist(
+ self._API_V2_BASE + self._BASE_URL_MAP[resource] % user['id'],
+ str_or_none(user.get('id')),
+ '%s (%s)' % (user['username'], resource.capitalize()))
+
+
+class SoundcloudTrackStationIE(SoundcloudPagedPlaylistBaseIE):
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?soundcloud\.com/stations/track/[^/]+/(?P<id>[^/?#&]+)'
+ IE_NAME = 'soundcloud:trackstation'
+ _TESTS = [{
+ 'url': 'https://soundcloud.com/stations/track/officialsundial/your-text',
+ 'info_dict': {
+ 'id': '286017854',
+ 'title': 'Track station: your text',
+ },
+ 'playlist_mincount': 47,
+ }]
+
+ def _real_extract(self, url):
+ track_name = self._match_id(url)
+
+ track = self._download_json(self._resolv_url(url), track_name, headers=self._HEADERS)
+ track_id = self._search_regex(
+ r'soundcloud:track-stations:(\d+)', track['id'], 'track id')
+
+ return self._extract_playlist(
+ self._API_V2_BASE + 'stations/%s/tracks' % track['id'],
+ track_id, 'Track station: %s' % track['title'])
+
+
+class SoundcloudPlaylistIE(SoundcloudPlaylistBaseIE):
+ _VALID_URL = r'https?://api(?:-v2)?\.soundcloud\.com/playlists/(?P<id>[0-9]+)(?:/?\?secret_token=(?P<token>[^&]+?))?$'
+ IE_NAME = 'soundcloud:playlist'
+ _TESTS = [{
+ 'url': 'https://api.soundcloud.com/playlists/4110309',
+ 'info_dict': {
+ 'id': '4110309',
+ 'title': 'TILT Brass - Bowery Poetry Club, August \'03 [Non-Site SCR 02]',
+ 'description': 're:.*?TILT Brass - Bowery Poetry Club',
+ },
+ 'playlist_count': 6,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ playlist_id = mobj.group('id')
+
+ query = {}
+ token = mobj.group('token')
+ if token:
+ query['secret_token'] = token
+
+ data = self._download_json(
+ self._API_V2_BASE + 'playlists/' + playlist_id,
+ playlist_id, 'Downloading playlist', query=query, headers=self._HEADERS)
+
+ return self._extract_set(data, token)
+
+
+class SoundcloudSearchIE(SearchInfoExtractor, SoundcloudIE):
+ IE_NAME = 'soundcloud:search'
+ IE_DESC = 'Soundcloud search'
+ _MAX_RESULTS = float('inf')
+ _TESTS = [{
+ 'url': 'scsearch15:post-avant jazzcore',
+ 'info_dict': {
+ 'title': 'post-avant jazzcore',
+ },
+ 'playlist_count': 15,
+ }]
+
+ _SEARCH_KEY = 'scsearch'
+ _MAX_RESULTS_PER_PAGE = 200
+ _DEFAULT_RESULTS_PER_PAGE = 50
+
+ def _get_collection(self, endpoint, collection_id, **query):
+ limit = min(
+ query.get('limit', self._DEFAULT_RESULTS_PER_PAGE),
+ self._MAX_RESULTS_PER_PAGE)
+ query.update({
+ 'limit': limit,
+ 'linked_partitioning': 1,
+ 'offset': 0,
+ })
+ next_url = update_url_query(self._API_V2_BASE + endpoint, query)
+
+ collected_results = 0
+
+ for i in itertools.count(1):
+ response = self._download_json(
+ next_url, collection_id, 'Downloading page {0}'.format(i),
+ 'Unable to download API page', headers=self._HEADERS)
+
+ collection = response.get('collection', [])
+ if not collection:
+ break
+
+ collection = list(filter(bool, collection))
+ collected_results += len(collection)
+
+ for item in collection:
+ yield self.url_result(item['uri'], SoundcloudIE.ie_key())
+
+ if not collection or collected_results >= limit:
+ break
+
+ next_url = response.get('next_href')
+ if not next_url:
+ break
+
+ def _get_n_results(self, query, n):
+ tracks = self._get_collection('search/tracks', query, limit=n, q=query)
+ return self.playlist_result(tracks, playlist_title=query)
diff --git a/youtube_dl/extractor/soundgasm.py b/youtube_dlc/extractor/soundgasm.py
index 3d78a9d76..3d78a9d76 100644
--- a/youtube_dl/extractor/soundgasm.py
+++ b/youtube_dlc/extractor/soundgasm.py
diff --git a/youtube_dl/extractor/southpark.py b/youtube_dlc/extractor/southpark.py
index da75a43a7..da75a43a7 100644
--- a/youtube_dl/extractor/southpark.py
+++ b/youtube_dlc/extractor/southpark.py
diff --git a/youtube_dlc/extractor/spankbang.py b/youtube_dlc/extractor/spankbang.py
new file mode 100644
index 000000000..61ca902ce
--- /dev/null
+++ b/youtube_dlc/extractor/spankbang.py
@@ -0,0 +1,184 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ merge_dicts,
+ orderedSet,
+ parse_duration,
+ parse_resolution,
+ str_to_int,
+ url_or_none,
+ urlencode_postdata,
+)
+
+
+class SpankBangIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/(?:video|play|embed)\b'
+ _TESTS = [{
+ 'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
+ 'md5': '1cc433e1d6aa14bc376535b8679302f7',
+ 'info_dict': {
+ 'id': '3vvn',
+ 'ext': 'mp4',
+ 'title': 'fantasy solo',
+ 'description': 'dillion harper masturbates on a bed',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'silly2587',
+ 'timestamp': 1422571989,
+ 'upload_date': '20150129',
+ 'age_limit': 18,
+ }
+ }, {
+ # 480p only
+ 'url': 'http://spankbang.com/1vt0/video/solvane+gangbang',
+ 'only_matching': True,
+ }, {
+ # no uploader
+ 'url': 'http://spankbang.com/lklg/video/sex+with+anyone+wedding+edition+2',
+ 'only_matching': True,
+ }, {
+ # mobile page
+ 'url': 'http://m.spankbang.com/1o2de/video/can+t+remember+her+name',
+ 'only_matching': True,
+ }, {
+ # 4k
+ 'url': 'https://spankbang.com/1vwqx/video/jade+kush+solo+4k',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.spankbang.com/3vvn/play/fantasy+solo/480p/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.spankbang.com/3vvn/play',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://spankbang.com/2y3td/embed/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(
+ url.replace('/%s/embed' % video_id, '/%s/video' % video_id),
+ video_id, headers={'Cookie': 'country=US'})
+
+ if re.search(r'<[^>]+\b(?:id|class)=["\']video_removed', webpage):
+ raise ExtractorError(
+ 'Video %s is not available' % video_id, expected=True)
+
+ formats = []
+
+ def extract_format(format_id, format_url):
+ f_url = url_or_none(format_url)
+ if not f_url:
+ return
+ f = parse_resolution(format_id)
+ ext = determine_ext(f_url)
+ if format_id.startswith('m3u8') or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ f_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif format_id.startswith('mpd') or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ f_url, video_id, mpd_id='dash', fatal=False))
+ elif ext == 'mp4' or f.get('width') or f.get('height'):
+ f.update({
+ 'url': f_url,
+ 'format_id': format_id,
+ })
+ formats.append(f)
+
+ STREAM_URL_PREFIX = 'stream_url_'
+
+ for mobj in re.finditer(
+ r'%s(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2'
+ % STREAM_URL_PREFIX, webpage):
+ extract_format(mobj.group('id', 'url'))
+
+ if not formats:
+ stream_key = self._search_regex(
+ r'data-streamkey\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ webpage, 'stream key', group='value')
+
+ stream = self._download_json(
+ 'https://spankbang.com/api/videos/stream', video_id,
+ 'Downloading stream JSON', data=urlencode_postdata({
+ 'id': stream_key,
+ 'data': 0,
+ }), headers={
+ 'Referer': url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ })
+
+ for format_id, format_url in stream.items():
+ if format_url and isinstance(format_url, list):
+ format_url = format_url[0]
+ extract_format(format_id, format_url)
+
+ self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'tbr', 'format_id'))
+
+ info = self._search_json_ld(webpage, video_id, default={})
+
+ title = self._html_search_regex(
+ r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title', default=None)
+ description = self._search_regex(
+ r'<div[^>]+\bclass=["\']bottom[^>]+>\s*<p>[^<]*</p>\s*<p>([^<]+)',
+ webpage, 'description', default=None)
+ thumbnail = self._og_search_thumbnail(webpage, default=None)
+ uploader = self._html_search_regex(
+ (r'(?s)<li[^>]+class=["\']profile[^>]+>(.+?)</a>',
+ r'class="user"[^>]*><img[^>]+>([^<]+)'),
+ webpage, 'uploader', default=None)
+ duration = parse_duration(self._search_regex(
+ r'<div[^>]+\bclass=["\']right_side[^>]+>\s*<span>([^<]+)',
+ webpage, 'duration', default=None))
+ view_count = str_to_int(self._search_regex(
+ r'([\d,.]+)\s+plays', webpage, 'view count', default=None))
+
+ age_limit = self._rta_search(webpage)
+
+ return merge_dicts({
+ 'id': video_id,
+ 'title': title or video_id,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'formats': formats,
+ 'age_limit': age_limit,
+ }, info
+ )
+
+
+class SpankBangPlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/[^/]+'
+ _TEST = {
+ 'url': 'https://spankbang.com/ug0k/playlist/big+ass+titties',
+ 'info_dict': {
+ 'id': 'ug0k',
+ 'title': 'Big Ass Titties',
+ },
+ 'playlist_mincount': 50,
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ url, playlist_id, headers={'Cookie': 'country=US; mobile=on'})
+
+ entries = [self.url_result(
+ 'https://spankbang.com/%s/video' % video_id,
+ ie=SpankBangIE.ie_key(), video_id=video_id)
+ for video_id in orderedSet(re.findall(
+ r'<a[^>]+\bhref=["\']/?([\da-z]+)/play/', webpage))]
+
+ title = self._html_search_regex(
+ r'<h1>([^<]+)\s+playlist</h1>', webpage, 'playlist title',
+ fatal=False)
+
+ return self.playlist_result(entries, playlist_id, title)
diff --git a/youtube_dlc/extractor/spankwire.py b/youtube_dlc/extractor/spankwire.py
new file mode 100644
index 000000000..35ab9ec37
--- /dev/null
+++ b/youtube_dlc/extractor/spankwire.py
@@ -0,0 +1,182 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ int_or_none,
+ merge_dicts,
+ str_or_none,
+ str_to_int,
+ url_or_none,
+)
+
+
+class SpankwireIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?spankwire\.com/
+ (?:
+ [^/]+/video|
+ EmbedPlayer\.aspx/?\?.*?\bArticleId=
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [{
+ # download URL pattern: */<height>P_<tbr>K_<video_id>.mp4
+ 'url': 'http://www.spankwire.com/Buckcherry-s-X-Rated-Music-Video-Crazy-Bitch/video103545/',
+ 'md5': '5aa0e4feef20aad82cbcae3aed7ab7cd',
+ 'info_dict': {
+ 'id': '103545',
+ 'ext': 'mp4',
+ 'title': 'Buckcherry`s X Rated Music Video Crazy Bitch',
+ 'description': 'Crazy Bitch X rated music video.',
+ 'duration': 222,
+ 'uploader': 'oreusz',
+ 'uploader_id': '124697',
+ 'timestamp': 1178587885,
+ 'upload_date': '20070508',
+ 'average_rating': float,
+ 'view_count': int,
+ 'comment_count': int,
+ 'age_limit': 18,
+ 'categories': list,
+ 'tags': list,
+ },
+ }, {
+ # download URL pattern: */mp4_<format_id>_<video_id>.mp4
+ 'url': 'http://www.spankwire.com/Titcums-Compiloation-I/video1921551/',
+ 'md5': '09b3c20833308b736ae8902db2f8d7e6',
+ 'info_dict': {
+ 'id': '1921551',
+ 'ext': 'mp4',
+ 'title': 'Titcums Compiloation I',
+ 'description': 'cum on tits',
+ 'uploader': 'dannyh78999',
+ 'uploader_id': '3056053',
+ 'upload_date': '20150822',
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'proxy': '127.0.0.1:8118'
+ },
+ 'skip': 'removed',
+ }, {
+ 'url': 'https://www.spankwire.com/EmbedPlayer.aspx/?ArticleId=156156&autostart=true',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?spankwire\.com/EmbedPlayer\.aspx/?\?.*?\bArticleId=\d+)',
+ webpage)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'https://www.spankwire.com/api/video/%s.json' % video_id, video_id)
+
+ title = video['title']
+
+ formats = []
+ videos = video.get('videos')
+ if isinstance(videos, dict):
+ for format_id, format_url in videos.items():
+ video_url = url_or_none(format_url)
+ if not format_url:
+ continue
+ height = int_or_none(self._search_regex(
+ r'(\d+)[pP]', format_id, 'height', default=None))
+ m = re.search(
+ r'/(?P<height>\d+)[pP]_(?P<tbr>\d+)[kK]', video_url)
+ if m:
+ tbr = int(m.group('tbr'))
+ height = height or int(m.group('height'))
+ else:
+ tbr = None
+ formats.append({
+ 'url': video_url,
+ 'format_id': '%dp' % height if height else format_id,
+ 'height': height,
+ 'tbr': tbr,
+ })
+ m3u8_url = url_or_none(video.get('HLS'))
+ if m3u8_url:
+ formats.extend(self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ self._sort_formats(formats, ('height', 'tbr', 'width', 'format_id'))
+
+ view_count = str_to_int(video.get('viewed'))
+
+ thumbnails = []
+ for preference, t in enumerate(('', '2x'), start=0):
+ thumbnail_url = url_or_none(video.get('poster%s' % t))
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'preference': preference,
+ })
+
+ def extract_names(key):
+ entries_list = video.get(key)
+ if not isinstance(entries_list, list):
+ return
+ entries = []
+ for entry in entries_list:
+ name = str_or_none(entry.get('name'))
+ if name:
+ entries.append(name)
+ return entries
+
+ categories = extract_names('categories')
+ tags = extract_names('tags')
+
+ uploader = None
+ info = {}
+
+ webpage = self._download_webpage(
+ 'https://www.spankwire.com/_/video%s/' % video_id, video_id,
+ fatal=False)
+ if webpage:
+ info = self._search_json_ld(webpage, video_id, default={})
+ thumbnail_url = None
+ if 'thumbnail' in info:
+ thumbnail_url = url_or_none(info['thumbnail'])
+ del info['thumbnail']
+ if not thumbnail_url:
+ thumbnail_url = self._og_search_thumbnail(webpage)
+ if thumbnail_url:
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'preference': 10,
+ })
+ uploader = self._html_search_regex(
+ r'(?s)by\s*<a[^>]+\bclass=["\']uploaded__by[^>]*>(.+?)</a>',
+ webpage, 'uploader', fatal=False)
+ if not view_count:
+ view_count = str_to_int(self._search_regex(
+ r'data-views=["\']([\d,.]+)', webpage, 'view count',
+ fatal=False))
+
+ return merge_dicts({
+ 'id': video_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'duration': int_or_none(video.get('duration')),
+ 'thumbnails': thumbnails,
+ 'uploader': uploader,
+ 'uploader_id': str_or_none(video.get('userId')),
+ 'timestamp': int_or_none(video.get('time_approved_on')),
+ 'average_rating': float_or_none(video.get('rating')),
+ 'view_count': view_count,
+ 'comment_count': int_or_none(video.get('comments')),
+ 'age_limit': 18,
+ 'categories': categories,
+ 'tags': tags,
+ 'formats': formats,
+ }, info)
diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dlc/extractor/spiegel.py
index 4df7f4ddc..4df7f4ddc 100644
--- a/youtube_dl/extractor/spiegel.py
+++ b/youtube_dlc/extractor/spiegel.py
diff --git a/youtube_dl/extractor/spiegeltv.py b/youtube_dlc/extractor/spiegeltv.py
index 6ccf4c342..6ccf4c342 100644
--- a/youtube_dl/extractor/spiegeltv.py
+++ b/youtube_dlc/extractor/spiegeltv.py
diff --git a/youtube_dlc/extractor/spike.py b/youtube_dlc/extractor/spike.py
new file mode 100644
index 000000000..aabff7a3c
--- /dev/null
+++ b/youtube_dlc/extractor/spike.py
@@ -0,0 +1,55 @@
+from __future__ import unicode_literals
+
+from .mtv import MTVServicesInfoExtractor
+
+
+class BellatorIE(MTVServicesInfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?bellator\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)'
+ _TESTS = [{
+ 'url': 'http://www.bellator.com/fight/atwr7k/bellator-158-michael-page-vs-evangelista-cyborg',
+ 'info_dict': {
+ 'title': 'Michael Page vs. Evangelista Cyborg',
+ 'description': 'md5:0d917fc00ffd72dd92814963fc6cbb05',
+ },
+ 'playlist_count': 3,
+ }, {
+ 'url': 'http://www.bellator.com/video-clips/bw6k7n/bellator-158-foundations-michael-venom-page',
+ 'only_matching': True,
+ }]
+
+ _FEED_URL = 'http://www.bellator.com/feeds/mrss/'
+ _GEO_COUNTRIES = ['US']
+
+ def _extract_mgid(self, webpage):
+ return self._extract_triforce_mgid(webpage)
+
+
+class ParamountNetworkIE(MTVServicesInfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?paramountnetwork\.com/[^/]+/[\da-z]{6}(?:[/?#&]|$)'
+ _TESTS = [{
+ 'url': 'http://www.paramountnetwork.com/episodes/j830qm/lip-sync-battle-joel-mchale-vs-jim-rash-season-2-ep-13',
+ 'info_dict': {
+ 'id': '37ace3a8-1df6-48be-85b8-38df8229e241',
+ 'ext': 'mp4',
+ 'title': 'Lip Sync Battle|April 28, 2016|2|209|Joel McHale Vs. Jim Rash|Act 1',
+ 'description': 'md5:a739ca8f978a7802f67f8016d27ce114',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }]
+
+ _FEED_URL = 'http://www.paramountnetwork.com/feeds/mrss/'
+ _GEO_COUNTRIES = ['US']
+
+ def _extract_mgid(self, webpage):
+ root_data = self._parse_json(self._search_regex(
+ r'window\.__DATA__\s*=\s*({.+})',
+ webpage, 'data'), None)
+
+ def find_sub_data(data, data_type):
+ return next(c for c in data['children'] if c.get('type') == data_type)
+
+ c = find_sub_data(find_sub_data(root_data, 'MainContainer'), 'VideoPlayer')
+ return c['props']['media']['video']['config']['uri']
diff --git a/youtube_dl/extractor/sport5.py b/youtube_dlc/extractor/sport5.py
index a417b5a4e..a417b5a4e 100644
--- a/youtube_dl/extractor/sport5.py
+++ b/youtube_dlc/extractor/sport5.py
diff --git a/youtube_dl/extractor/sportbox.py b/youtube_dlc/extractor/sportbox.py
index b9017fd2a..b9017fd2a 100644
--- a/youtube_dl/extractor/sportbox.py
+++ b/youtube_dlc/extractor/sportbox.py
diff --git a/youtube_dlc/extractor/sportdeutschland.py b/youtube_dlc/extractor/sportdeutschland.py
new file mode 100644
index 000000000..378fc7568
--- /dev/null
+++ b/youtube_dlc/extractor/sportdeutschland.py
@@ -0,0 +1,82 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_iso8601,
+ sanitized_Request,
+)
+
+
+class SportDeutschlandIE(InfoExtractor):
+ _VALID_URL = r'https?://sportdeutschland\.tv/(?P<sport>[^/?#]+)/(?P<id>[^?#/]+)(?:$|[?#])'
+ _TESTS = [{
+ 'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',
+ 'info_dict': {
+ 'id': 're-live-deutsche-meisterschaften-2020-halbfinals',
+ 'ext': 'mp4',
+ 'title': 're:Re-live: Deutsche Meisterschaften 2020.*Halbfinals',
+ 'categories': ['Badminton-Deutschland'],
+ 'view_count': int,
+ 'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
+ 'timestamp': int,
+ 'upload_date': '20200201',
+ 'description': 're:.*', # meaningless description for THIS video
+ },
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ sport_id = mobj.group('sport')
+
+ api_url = 'https://proxy.vidibusdynamic.net/ssl/backend.sportdeutschland.tv/api/permalinks/%s/%s?access_token=true' % (
+ sport_id, video_id)
+ req = sanitized_Request(api_url, headers={
+ 'Accept': 'application/vnd.vidibus.v2.html+json',
+ 'Referer': url,
+ })
+ data = self._download_json(req, video_id)
+
+ asset = data['asset']
+ categories = [data['section']['title']]
+
+ formats = []
+ smil_url = asset['video']
+ if '.smil' in smil_url:
+ m3u8_url = smil_url.replace('.smil', '.m3u8')
+ formats.extend(
+ self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4'))
+
+ smil_doc = self._download_xml(
+ smil_url, video_id, note='Downloading SMIL metadata')
+ base_url_el = smil_doc.find('./head/meta')
+ if base_url_el:
+ base_url = base_url_el.attrib['base']
+ formats.extend([{
+ 'format_id': 'rmtp',
+ 'url': base_url if base_url_el else n.attrib['src'],
+ 'play_path': n.attrib['src'],
+ 'ext': 'flv',
+ 'preference': -100,
+ 'format_note': 'Seems to fail at example stream',
+ } for n in smil_doc.findall('./body/video')])
+ else:
+ formats.append({'url': smil_url})
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': asset['title'],
+ 'thumbnail': asset.get('image'),
+ 'description': asset.get('teaser'),
+ 'duration': asset.get('duration'),
+ 'categories': categories,
+ 'view_count': asset.get('views'),
+ 'rtmp_live': asset.get('live'),
+ 'timestamp': parse_iso8601(asset.get('date')),
+ }
diff --git a/youtube_dl/extractor/springboardplatform.py b/youtube_dlc/extractor/springboardplatform.py
index 07d99b579..07d99b579 100644
--- a/youtube_dl/extractor/springboardplatform.py
+++ b/youtube_dlc/extractor/springboardplatform.py
diff --git a/youtube_dl/extractor/sprout.py b/youtube_dlc/extractor/sprout.py
index 8467bf49d..8467bf49d 100644
--- a/youtube_dl/extractor/sprout.py
+++ b/youtube_dlc/extractor/sprout.py
diff --git a/youtube_dl/extractor/srgssr.py b/youtube_dlc/extractor/srgssr.py
index 170dce87f..170dce87f 100644
--- a/youtube_dl/extractor/srgssr.py
+++ b/youtube_dlc/extractor/srgssr.py
diff --git a/youtube_dlc/extractor/srmediathek.py b/youtube_dlc/extractor/srmediathek.py
new file mode 100644
index 000000000..359dadaa3
--- /dev/null
+++ b/youtube_dlc/extractor/srmediathek.py
@@ -0,0 +1,59 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .ard import ARDMediathekBaseIE
+from ..utils import (
+ ExtractorError,
+ get_element_by_attribute,
+)
+
+
+class SRMediathekIE(ARDMediathekBaseIE):
+ IE_NAME = 'sr:mediathek'
+ IE_DESC = 'Saarländischer Rundfunk'
+ _VALID_URL = r'https?://sr-mediathek(?:\.sr-online)?\.de/index\.php\?.*?&id=(?P<id>[0-9]+)'
+
+ _TESTS = [{
+ 'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=28455',
+ 'info_dict': {
+ 'id': '28455',
+ 'ext': 'mp4',
+ 'title': 'sportarena (26.10.2014)',
+ 'description': 'Ringen: KSV Köllerbach gegen Aachen-Walheim; Frauen-Fußball: 1. FC Saarbrücken gegen Sindelfingen; Motorsport: Rallye in Losheim; dazu: Interview mit Timo Bernhard; Turnen: TG Saar; Reitsport: Deutscher Voltigier-Pokal; Badminton: Interview mit Michael Fuchs ',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ },
+ 'skip': 'no longer available',
+ }, {
+ 'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=37682',
+ 'info_dict': {
+ 'id': '37682',
+ 'ext': 'mp4',
+ 'title': 'Love, Cakes and Rock\'n\'Roll',
+ 'description': 'md5:18bf9763631c7d326c22603681e1123d',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://sr-mediathek.de/index.php?seite=7&id=7480',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ if '>Der gew&uuml;nschte Beitrag ist leider nicht mehr verf&uuml;gbar.<' in webpage:
+ raise ExtractorError('Video %s is no longer available' % video_id, expected=True)
+
+ media_collection_url = self._search_regex(
+ r'data-mediacollection-ardplayer="([^"]+)"', webpage, 'media collection url')
+ info = self._extract_media_info(media_collection_url, webpage, video_id)
+ info.update({
+ 'id': video_id,
+ 'title': get_element_by_attribute('class', 'ardplayer-title', webpage),
+ 'description': self._og_search_description(webpage),
+ 'thumbnail': self._og_search_thumbnail(webpage),
+ })
+ return info
diff --git a/youtube_dl/extractor/stanfordoc.py b/youtube_dlc/extractor/stanfordoc.py
index ae3dd1380..ae3dd1380 100644
--- a/youtube_dl/extractor/stanfordoc.py
+++ b/youtube_dlc/extractor/stanfordoc.py
diff --git a/youtube_dl/extractor/steam.py b/youtube_dlc/extractor/steam.py
index a6a191ceb..a6a191ceb 100644
--- a/youtube_dl/extractor/steam.py
+++ b/youtube_dlc/extractor/steam.py
diff --git a/youtube_dl/extractor/stitcher.py b/youtube_dlc/extractor/stitcher.py
index 97d1ff681..97d1ff681 100644
--- a/youtube_dl/extractor/stitcher.py
+++ b/youtube_dlc/extractor/stitcher.py
diff --git a/youtube_dlc/extractor/storyfire.py b/youtube_dlc/extractor/storyfire.py
new file mode 100644
index 000000000..67457cc94
--- /dev/null
+++ b/youtube_dlc/extractor/storyfire.py
@@ -0,0 +1,255 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+from .common import InfoExtractor
+
+
+class StoryFireIE(InfoExtractor):
+ _VALID_URL = r'(?:(?:https?://(?:www\.)?storyfire\.com/video-details)|(?:https://storyfire.app.link))/(?P<id>[^/\s]+)'
+ _TESTS = [{
+ 'url': 'https://storyfire.com/video-details/5df1d132b6378700117f9181',
+ 'md5': '560953bfca81a69003cfa5e53ac8a920',
+ 'info_dict': {
+ 'id': '5df1d132b6378700117f9181',
+ 'ext': 'mp4',
+ 'title': 'Buzzfeed Teaches You About Memes',
+ 'uploader_id': 'ntZAJFECERSgqHSxzonV5K2E89s1',
+ 'timestamp': 1576129028,
+ 'description': 'Mocking Buzzfeed\'s meme lesson. Reuploaded from YouTube because of their new policies',
+ 'uploader': 'whang!',
+ 'upload_date': '20191212',
+ },
+ 'params': {'format': 'bestvideo'} # There are no merged formats in the playlist.
+ }, {
+ 'url': 'https://storyfire.app.link/5GxAvWOQr8', # Alternate URL format, with unrelated short ID
+ 'md5': '7a2dc6d60c4889edfed459c620fe690d',
+ 'info_dict': {
+ 'id': '5f1e11ecd78a57b6c702001d',
+ 'ext': 'm4a',
+ 'title': 'Weird Nintendo Prototype Leaks',
+ 'description': 'A stream taking a look at some weird Nintendo Prototypes with Luigi in Mario 64 and weird Yoshis',
+ 'timestamp': 1595808576,
+ 'upload_date': '20200727',
+ 'uploader': 'whang!',
+ 'uploader_id': 'ntZAJFECERSgqHSxzonV5K2E89s1',
+ },
+ 'params': {'format': 'bestaudio'} # Verifying audio extraction
+
+ }]
+
+ _aformats = {
+ 'audio-medium-audio': {'acodec': 'aac', 'abr': 125, 'preference': -10},
+ 'audio-high-audio': {'acodec': 'aac', 'abr': 254, 'preference': -1},
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ # Extracting the json blob is mandatory to proceed with extraction.
+ jsontext = self._html_search_regex(
+ r'<script id="__NEXT_DATA__" type="application/json">(.+?)</script>',
+ webpage, 'json_data')
+
+ json = self._parse_json(jsontext, video_id)
+
+ # The currentVideo field in the json is mandatory
+ # because it contains the only link to the m3u playlist
+ video = json['props']['initialState']['video']['currentVideo']
+ videourl = video['vimeoVideoURL'] # Video URL is mandatory
+
+ # Extract other fields from the json in an error tolerant fashion
+ # ID may be incorrect (on short URL format), correct it.
+ parsed_id = video.get('_id')
+ if parsed_id:
+ video_id = parsed_id
+
+ title = video.get('title')
+ description = video.get('description')
+
+ thumbnail = video.get('storyImage')
+ views = video.get('views')
+ likes = video.get('likesCount')
+ comments = video.get('commentsCount')
+ duration = video.get('videoDuration')
+ publishdate = video.get('publishDate') # Apparently epoch time, day only
+
+ uploader = video.get('username')
+ uploader_id = video.get('hostID')
+ # Construct an uploader URL
+ uploader_url = None
+ if uploader_id:
+ uploader_url = "https://storyfire.com/user/%s/video" % uploader_id
+
+ # Collect root playlist to determine formats
+ formats = self._extract_m3u8_formats(
+ videourl, video_id, 'mp4', 'm3u8_native')
+
+ # Modify formats to fill in missing information about audio codecs
+ for format in formats:
+ aformat = self._aformats.get(format['format_id'])
+ if aformat:
+ format['acodec'] = aformat['acodec']
+ format['abr'] = aformat['abr']
+ format['preference'] = aformat['preference']
+ format['ext'] = 'm4a'
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'ext': "mp4",
+ 'url': videourl,
+ 'formats': formats,
+
+ 'thumbnail': thumbnail,
+ 'view_count': views,
+ 'like_count': likes,
+ 'comment_count': comments,
+ 'duration': duration,
+ 'timestamp': publishdate,
+
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'uploader_url': uploader_url,
+
+ }
+
+
+class StoryFireUserIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?storyfire\.com/user/(?P<id>[^/\s]+)/video'
+ _TESTS = [{
+ 'url': 'https://storyfire.com/user/ntZAJFECERSgqHSxzonV5K2E89s1/video',
+ 'info_dict': {
+ 'id': 'ntZAJFECERSgqHSxzonV5K2E89s1',
+ 'title': 'whang!',
+ },
+ 'playlist_mincount': 18
+ }, {
+ 'url': 'https://storyfire.com/user/UQ986nFxmAWIgnkZQ0ftVhq4nOk2/video',
+ 'info_dict': {
+ 'id': 'UQ986nFxmAWIgnkZQ0ftVhq4nOk2',
+ 'title': 'McJuggerNuggets',
+ },
+ 'playlist_mincount': 143
+
+ }]
+
+ # Generator for fetching playlist items
+ def _enum_videos(self, baseurl, user_id, firstjson):
+ totalVideos = int(firstjson['videosCount'])
+ haveVideos = 0
+ json = firstjson
+
+ for page in itertools.count(1):
+ for video in json['videos']:
+ id = video['_id']
+ url = "https://storyfire.com/video-details/%s" % id
+ haveVideos += 1
+ yield {
+ '_type': 'url',
+ 'id': id,
+ 'url': url,
+ 'ie_key': 'StoryFire',
+
+ 'title': video.get('title'),
+ 'description': video.get('description'),
+ 'view_count': video.get('views'),
+ 'comment_count': video.get('commentsCount'),
+ 'duration': video.get('videoDuration'),
+ 'timestamp': video.get('publishDate'),
+ }
+ # Are there more pages we could fetch?
+ if haveVideos < totalVideos:
+ pageurl = baseurl + ("%i" % haveVideos)
+ json = self._download_json(pageurl, user_id,
+ note='Downloading page %s' % page)
+
+ # Are there any videos in the new json?
+ videos = json.get('videos')
+ if not videos or len(videos) == 0:
+ break # no videos
+
+ else:
+ break # We have fetched all the videos, stop
+
+ def _real_extract(self, url):
+ user_id = self._match_id(url)
+
+ baseurl = "https://storyfire.com/app/publicVideos/%s?skip=" % user_id
+
+ # Download first page to ensure it can be downloaded, and get user information if available.
+ firstpage = baseurl + "0"
+ firstjson = self._download_json(firstpage, user_id)
+
+ title = None
+ videos = firstjson.get('videos')
+ if videos and len(videos):
+ title = videos[1].get('username')
+
+ return {
+ '_type': 'playlist',
+ 'entries': self._enum_videos(baseurl, user_id, firstjson),
+ 'id': user_id,
+ 'title': title,
+ }
+
+
+class StoryFireSeriesIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?storyfire\.com/write/series/stories/(?P<id>[^/\s]+)'
+ _TESTS = [{
+ 'url': 'https://storyfire.com/write/series/stories/-Lq6MsuIHLODO6d2dDkr/',
+ 'info_dict': {
+ 'id': '-Lq6MsuIHLODO6d2dDkr',
+ },
+ 'playlist_mincount': 13
+ }, {
+ 'url': 'https://storyfire.com/write/series/stories/the_mortal_one/',
+ 'info_dict': {
+ 'id': 'the_mortal_one',
+ },
+ 'playlist_count': 0 # This playlist has entries, but no videos.
+ }, {
+ 'url': 'https://storyfire.com/write/series/stories/story_time',
+ 'info_dict': {
+ 'id': 'story_time',
+ },
+ 'playlist_mincount': 10
+ }]
+
+ # Generator for returning playlist items
+ # This object is substantially different than the one in the user videos page above
+ def _enum_videos(self, jsonlist):
+ for video in jsonlist:
+ id = video['_id']
+ if video.get('hasVideo'): # Boolean element
+ url = "https://storyfire.com/video-details/%s" % id
+ yield {
+ '_type': 'url',
+ 'id': id,
+ 'url': url,
+ 'ie_key': 'StoryFire',
+
+ 'title': video.get('title'),
+ 'description': video.get('description'),
+ 'view_count': video.get('views'),
+ 'likes_count': video.get('likesCount'),
+ 'comment_count': video.get('commentsCount'),
+ 'duration': video.get('videoDuration'),
+ 'timestamp': video.get('publishDate'),
+ }
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+
+ listurl = "https://storyfire.com/app/seriesStories/%s/list" % list_id
+ json = self._download_json(listurl, list_id)
+
+ return {
+ '_type': 'playlist',
+ 'entries': self._enum_videos(json),
+ 'id': list_id
+ }
diff --git a/youtube_dl/extractor/streamable.py b/youtube_dlc/extractor/streamable.py
index 34725274e..34725274e 100644
--- a/youtube_dl/extractor/streamable.py
+++ b/youtube_dlc/extractor/streamable.py
diff --git a/youtube_dlc/extractor/streamcloud.py b/youtube_dlc/extractor/streamcloud.py
new file mode 100644
index 000000000..32eb2b92d
--- /dev/null
+++ b/youtube_dlc/extractor/streamcloud.py
@@ -0,0 +1,78 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ urlencode_postdata,
+)
+
+
+class StreamcloudIE(InfoExtractor):
+ IE_NAME = 'streamcloud.eu'
+ _VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)(?:/(?P<fname>[^#?]*)\.html)?'
+
+ _TESTS = [{
+ 'url': 'http://streamcloud.eu/skp9j99s4bpz/youtube-dlc_test_video_____________-BaW_jenozKc.mp4.html',
+ 'md5': '6bea4c7fa5daaacc2a946b7146286686',
+ 'info_dict': {
+ 'id': 'skp9j99s4bpz',
+ 'ext': 'mp4',
+ 'title': 'youtube-dlc test video \'/\\ ä ↭',
+ },
+ 'skip': 'Only available from the EU'
+ }, {
+ 'url': 'http://streamcloud.eu/ua8cmfh1nbe6/NSHIP-148--KUC-NG--H264-.mp4.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ url = 'http://streamcloud.eu/%s' % video_id
+
+ orig_webpage = self._download_webpage(url, video_id)
+
+ if '>File Not Found<' in orig_webpage:
+ raise ExtractorError(
+ 'Video %s does not exist' % video_id, expected=True)
+
+ fields = re.findall(r'''(?x)<input\s+
+ type="(?:hidden|submit)"\s+
+ name="([^"]+)"\s+
+ (?:id="[^"]+"\s+)?
+ value="([^"]*)"
+ ''', orig_webpage)
+
+ self._sleep(6, video_id)
+
+ webpage = self._download_webpage(
+ url, video_id, data=urlencode_postdata(fields), headers={
+ b'Content-Type': b'application/x-www-form-urlencoded',
+ })
+
+ try:
+ title = self._html_search_regex(
+ r'<h1[^>]*>([^<]+)<', webpage, 'title')
+ video_url = self._search_regex(
+ r'file:\s*"([^"]+)"', webpage, 'video URL')
+ except ExtractorError:
+ message = self._html_search_regex(
+ r'(?s)<div[^>]+class=(["\']).*?msgboxinfo.*?\1[^>]*>(?P<message>.+?)</div>',
+ webpage, 'message', default=None, group='message')
+ if message:
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True)
+ raise
+ thumbnail = self._search_regex(
+ r'image:\s*"([^"]+)"', webpage, 'thumbnail URL', fatal=False)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'url': video_url,
+ 'thumbnail': thumbnail,
+ 'http_headers': {
+ 'Referer': url,
+ },
+ }
diff --git a/youtube_dl/extractor/streamcz.py b/youtube_dlc/extractor/streamcz.py
index 58e0b4c80..58e0b4c80 100644
--- a/youtube_dl/extractor/streamcz.py
+++ b/youtube_dlc/extractor/streamcz.py
diff --git a/youtube_dl/extractor/streetvoice.py b/youtube_dlc/extractor/streetvoice.py
index 91612c7f2..91612c7f2 100644
--- a/youtube_dl/extractor/streetvoice.py
+++ b/youtube_dlc/extractor/streetvoice.py
diff --git a/youtube_dlc/extractor/stretchinternet.py b/youtube_dlc/extractor/stretchinternet.py
new file mode 100644
index 000000000..4dbead2ba
--- /dev/null
+++ b/youtube_dlc/extractor/stretchinternet.py
@@ -0,0 +1,32 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import int_or_none
+
+
+class StretchInternetIE(InfoExtractor):
+ _VALID_URL = r'https?://portal\.stretchinternet\.com/[^/]+/(?:portal|full)\.htm\?.*?\beventId=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://portal.stretchinternet.com/umary/portal.htm?eventId=573272&streamType=video',
+ 'info_dict': {
+ 'id': '573272',
+ 'ext': 'mp4',
+ 'title': 'University of Mary Wrestling vs. Upper Iowa',
+ 'timestamp': 1575668361,
+ 'upload_date': '20191206',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ event = self._download_json(
+ 'https://api.stretchinternet.com/trinity/event/tcg/' + video_id,
+ video_id)[0]
+
+ return {
+ 'id': video_id,
+ 'title': event['title'],
+ 'timestamp': int_or_none(event.get('dateCreated'), 1000),
+ 'url': 'https://' + event['media'][0]['url'],
+ }
diff --git a/youtube_dlc/extractor/stv.py b/youtube_dlc/extractor/stv.py
new file mode 100644
index 000000000..bae8b71f4
--- /dev/null
+++ b/youtube_dlc/extractor/stv.py
@@ -0,0 +1,67 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ compat_str,
+ float_or_none,
+ int_or_none,
+)
+
+
+class STVPlayerIE(InfoExtractor):
+ IE_NAME = 'stv:player'
+ _VALID_URL = r'https?://player\.stv\.tv/(?P<type>episode|video)/(?P<id>[a-z0-9]{4})'
+ _TEST = {
+ 'url': 'https://player.stv.tv/video/4gwd/emmerdale/60-seconds-on-set-with-laura-norton/',
+ 'md5': '5adf9439c31d554f8be0707c7abe7e0a',
+ 'info_dict': {
+ 'id': '5333973339001',
+ 'ext': 'mp4',
+ 'upload_date': '20170301',
+ 'title': '60 seconds on set with Laura Norton',
+ 'description': "How many questions can Laura - a.k.a Kerry Wyatt - answer in 60 seconds? Let\'s find out!",
+ 'timestamp': 1488388054,
+ 'uploader_id': '1486976045',
+ },
+ 'skip': 'this resource is unavailable outside of the UK',
+ }
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1486976045/default_default/index.html?videoId=%s'
+ _PTYPE_MAP = {
+ 'episode': 'episodes',
+ 'video': 'shortform',
+ }
+
+ def _real_extract(self, url):
+ ptype, video_id = re.match(self._VALID_URL, url).groups()
+ resp = self._download_json(
+ 'https://player.api.stv.tv/v1/%s/%s' % (self._PTYPE_MAP[ptype], video_id),
+ video_id)
+
+ result = resp['results']
+ video = result['video']
+ video_id = compat_str(video['id'])
+
+ subtitles = {}
+ _subtitles = result.get('_subtitles') or {}
+ for ext, sub_url in _subtitles.items():
+ subtitles.setdefault('en', []).append({
+ 'ext': 'vtt' if ext == 'webvtt' else ext,
+ 'url': sub_url,
+ })
+
+ programme = result.get('programme') or {}
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'url': self.BRIGHTCOVE_URL_TEMPLATE % video_id,
+ 'description': result.get('summary'),
+ 'duration': float_or_none(video.get('length'), 1000),
+ 'subtitles': subtitles,
+ 'view_count': int_or_none(result.get('views')),
+ 'series': programme.get('name') or programme.get('shortName'),
+ 'ie_key': 'BrightcoveNew',
+ }
diff --git a/youtube_dl/extractor/sunporno.py b/youtube_dlc/extractor/sunporno.py
index 68051169b..68051169b 100644
--- a/youtube_dl/extractor/sunporno.py
+++ b/youtube_dlc/extractor/sunporno.py
diff --git a/youtube_dl/extractor/sverigesradio.py b/youtube_dlc/extractor/sverigesradio.py
index aa0691f0d..aa0691f0d 100644
--- a/youtube_dl/extractor/sverigesradio.py
+++ b/youtube_dlc/extractor/sverigesradio.py
diff --git a/youtube_dlc/extractor/svt.py b/youtube_dlc/extractor/svt.py
new file mode 100644
index 000000000..e12389cad
--- /dev/null
+++ b/youtube_dlc/extractor/svt.py
@@ -0,0 +1,380 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ determine_ext,
+ dict_get,
+ int_or_none,
+ str_or_none,
+ strip_or_none,
+ try_get,
+)
+
+
+class SVTBaseIE(InfoExtractor):
+ _GEO_COUNTRIES = ['SE']
+
+ def _extract_video(self, video_info, video_id):
+ is_live = dict_get(video_info, ('live', 'simulcast'), default=False)
+ m3u8_protocol = 'm3u8' if is_live else 'm3u8_native'
+ formats = []
+ for vr in video_info['videoReferences']:
+ player_type = vr.get('playerType') or vr.get('format')
+ vurl = vr['url']
+ ext = determine_ext(vurl)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ vurl, video_id,
+ ext='mp4', entry_protocol=m3u8_protocol,
+ m3u8_id=player_type, fatal=False))
+ elif ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ vurl + '?hdcore=3.3.0', video_id,
+ f4m_id=player_type, fatal=False))
+ elif ext == 'mpd':
+ if player_type == 'dashhbbtv':
+ formats.extend(self._extract_mpd_formats(
+ vurl, video_id, mpd_id=player_type, fatal=False))
+ else:
+ formats.append({
+ 'format_id': player_type,
+ 'url': vurl,
+ })
+ if not formats and video_info.get('rights', {}).get('geoBlockedSweden'):
+ self.raise_geo_restricted(
+ 'This video is only available in Sweden',
+ countries=self._GEO_COUNTRIES)
+ self._sort_formats(formats)
+
+ subtitles = {}
+ subtitle_references = dict_get(video_info, ('subtitles', 'subtitleReferences'))
+ if isinstance(subtitle_references, list):
+ for sr in subtitle_references:
+ subtitle_url = sr.get('url')
+ subtitle_lang = sr.get('language', 'sv')
+ if subtitle_url:
+ if determine_ext(subtitle_url) == 'm3u8':
+ # TODO(yan12125): handle WebVTT in m3u8 manifests
+ continue
+
+ subtitles.setdefault(subtitle_lang, []).append({'url': subtitle_url})
+
+ title = video_info.get('title')
+
+ series = video_info.get('programTitle')
+ season_number = int_or_none(video_info.get('season'))
+ episode = video_info.get('episodeTitle')
+ episode_number = int_or_none(video_info.get('episodeNumber'))
+
+ duration = int_or_none(dict_get(video_info, ('materialLength', 'contentDuration')))
+ age_limit = None
+ adult = dict_get(
+ video_info, ('inappropriateForChildren', 'blockedForChildren'),
+ skip_false_values=False)
+ if adult is not None:
+ age_limit = 18 if adult else 0
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'duration': duration,
+ 'age_limit': age_limit,
+ 'series': series,
+ 'season_number': season_number,
+ 'episode': episode,
+ 'episode_number': episode_number,
+ 'is_live': is_live,
+ }
+
+
+class SVTIE(SVTBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.svt.se/wd?widgetId=23991&sectionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false',
+ 'md5': '33e9a5d8f646523ce0868ecfb0eed77d',
+ 'info_dict': {
+ 'id': '2900353',
+ 'ext': 'mp4',
+ 'title': 'Stjärnorna skojar till det - under SVT-intervjun',
+ 'duration': 27,
+ 'age_limit': 0,
+ },
+ }
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'(?:<iframe src|href)="(?P<url>%s[^"]*)"' % SVTIE._VALID_URL, webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ widget_id = mobj.group('widget_id')
+ article_id = mobj.group('id')
+
+ info = self._download_json(
+ 'http://www.svt.se/wd?widgetId=%s&articleId=%s&format=json&type=embed&output=json' % (widget_id, article_id),
+ article_id)
+
+ info_dict = self._extract_video(info['video'], article_id)
+ info_dict['title'] = info['context']['title']
+ return info_dict
+
+
+class SVTPlayBaseIE(SVTBaseIE):
+ _SVTPLAY_RE = r'root\s*\[\s*(["\'])_*svtplay\1\s*\]\s*=\s*(?P<json>{.+?})\s*;\s*\n'
+
+
+class SVTPlayIE(SVTPlayBaseIE):
+ IE_DESC = 'SVT Play and Öppet arkiv'
+ _VALID_URL = r'''(?x)
+ (?:
+ svt:(?P<svt_id>[^/?#&]+)|
+ https?://(?:www\.)?(?:svtplay|oppetarkiv)\.se/(?:video|klipp|kanaler)/(?P<id>[^/?#&]+)
+ )
+ '''
+ _TESTS = [{
+ 'url': 'http://www.svtplay.se/video/5996901/flygplan-till-haile-selassie/flygplan-till-haile-selassie-2',
+ 'md5': '2b6704fe4a28801e1a098bbf3c5ac611',
+ 'info_dict': {
+ 'id': '5996901',
+ 'ext': 'mp4',
+ 'title': 'Flygplan till Haile Selassie',
+ 'duration': 3527,
+ 'thumbnail': r're:^https?://.*[\.-]jpg$',
+ 'age_limit': 0,
+ 'subtitles': {
+ 'sv': [{
+ 'ext': 'wsrt',
+ }]
+ },
+ },
+ }, {
+ # geo restricted to Sweden
+ 'url': 'http://www.oppetarkiv.se/video/5219710/trollflojten',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.svtplay.se/kanaler/svt1',
+ 'only_matching': True,
+ }, {
+ 'url': 'svt:1376446-003A',
+ 'only_matching': True,
+ }, {
+ 'url': 'svt:14278044',
+ 'only_matching': True,
+ }]
+
+ def _adjust_title(self, info):
+ if info['is_live']:
+ info['title'] = self._live_title(info['title'])
+
+ def _extract_by_video_id(self, video_id, webpage=None):
+ data = self._download_json(
+ 'https://api.svt.se/videoplayer-api/video/%s' % video_id,
+ video_id, headers=self.geo_verification_headers())
+ info_dict = self._extract_video(data, video_id)
+ if not info_dict.get('title'):
+ title = dict_get(info_dict, ('episode', 'series'))
+ if not title and webpage:
+ title = re.sub(
+ r'\s*\|\s*.+?$', '', self._og_search_title(webpage))
+ if not title:
+ title = video_id
+ info_dict['title'] = title
+ self._adjust_title(info_dict)
+ return info_dict
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id, svt_id = mobj.group('id', 'svt_id')
+
+ if svt_id:
+ return self._extract_by_video_id(svt_id)
+
+ webpage = self._download_webpage(url, video_id)
+
+ data = self._parse_json(
+ self._search_regex(
+ self._SVTPLAY_RE, webpage, 'embedded data', default='{}',
+ group='json'),
+ video_id, fatal=False)
+
+ thumbnail = self._og_search_thumbnail(webpage)
+
+ if data:
+ video_info = try_get(
+ data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'],
+ dict)
+ if video_info:
+ info_dict = self._extract_video(video_info, video_id)
+ info_dict.update({
+ 'title': data['context']['dispatcher']['stores']['MetaStore']['title'],
+ 'thumbnail': thumbnail,
+ })
+ self._adjust_title(info_dict)
+ return info_dict
+
+ svt_id = self._search_regex(
+ r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
+ webpage, 'video id')
+
+ return self._extract_by_video_id(svt_id, webpage)
+
+
+class SVTSeriesIE(SVTPlayBaseIE):
+ _VALID_URL = r'https?://(?:www\.)?svtplay\.se/(?P<id>[^/?&#]+)(?:.+?\btab=(?P<season_slug>[^&#]+))?'
+ _TESTS = [{
+ 'url': 'https://www.svtplay.se/rederiet',
+ 'info_dict': {
+ 'id': '14445680',
+ 'title': 'Rederiet',
+ 'description': 'md5:d9fdfff17f5d8f73468176ecd2836039',
+ },
+ 'playlist_mincount': 318,
+ }, {
+ 'url': 'https://www.svtplay.se/rederiet?tab=season-2-14445680',
+ 'info_dict': {
+ 'id': 'season-2-14445680',
+ 'title': 'Rederiet - Säsong 2',
+ 'description': 'md5:d9fdfff17f5d8f73468176ecd2836039',
+ },
+ 'playlist_mincount': 12,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super(SVTSeriesIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ series_slug, season_id = re.match(self._VALID_URL, url).groups()
+
+ series = self._download_json(
+ 'https://api.svt.se/contento/graphql', series_slug,
+ 'Downloading series page', query={
+ 'query': '''{
+ listablesBySlug(slugs: ["%s"]) {
+ associatedContent(include: [productionPeriod, season]) {
+ items {
+ item {
+ ... on Episode {
+ videoSvtId
+ }
+ }
+ }
+ id
+ name
+ }
+ id
+ longDescription
+ name
+ shortDescription
+ }
+}''' % series_slug,
+ })['data']['listablesBySlug'][0]
+
+ season_name = None
+
+ entries = []
+ for season in series['associatedContent']:
+ if not isinstance(season, dict):
+ continue
+ if season_id:
+ if season.get('id') != season_id:
+ continue
+ season_name = season.get('name')
+ items = season.get('items')
+ if not isinstance(items, list):
+ continue
+ for item in items:
+ video = item.get('item') or {}
+ content_id = video.get('videoSvtId')
+ if not content_id or not isinstance(content_id, compat_str):
+ continue
+ entries.append(self.url_result(
+ 'svt:' + content_id, SVTPlayIE.ie_key(), content_id))
+
+ title = series.get('name')
+ season_name = season_name or season_id
+
+ if title and season_name:
+ title = '%s - %s' % (title, season_name)
+ elif season_id:
+ title = season_id
+
+ return self.playlist_result(
+ entries, season_id or series.get('id'), title,
+ dict_get(series, ('longDescription', 'shortDescription')))
+
+
+class SVTPageIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?svt\.se/(?P<path>(?:[^/]+/)*(?P<id>[^/?&#]+))'
+ _TESTS = [{
+ 'url': 'https://www.svt.se/sport/ishockey/bakom-masken-lehners-kamp-mot-mental-ohalsa',
+ 'info_dict': {
+ 'id': '25298267',
+ 'title': 'Bakom masken – Lehners kamp mot mental ohälsa',
+ },
+ 'playlist_count': 4,
+ }, {
+ 'url': 'https://www.svt.se/nyheter/utrikes/svenska-andrea-ar-en-mil-fran-branderna-i-kalifornien',
+ 'info_dict': {
+ 'id': '24243746',
+ 'title': 'Svenska Andrea redo att fly sitt hem i Kalifornien',
+ },
+ 'playlist_count': 2,
+ }, {
+ # only programTitle
+ 'url': 'http://www.svt.se/sport/ishockey/jagr-tacklar-giroux-under-intervjun',
+ 'info_dict': {
+ 'id': '8439V2K',
+ 'ext': 'mp4',
+ 'title': 'Stjärnorna skojar till det - under SVT-intervjun',
+ 'duration': 27,
+ 'age_limit': 0,
+ },
+ }, {
+ 'url': 'https://www.svt.se/nyheter/lokalt/vast/svt-testar-tar-nagon-upp-skrapet-1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.svt.se/vader/manadskronikor/maj2018',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if SVTIE.suitable(url) else super(SVTPageIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ path, display_id = re.match(self._VALID_URL, url).groups()
+
+ article = self._download_json(
+ 'https://api.svt.se/nss-api/page/' + path, display_id,
+ query={'q': 'articles'})['articles']['content'][0]
+
+ entries = []
+
+ def _process_content(content):
+ if content.get('_type') in ('VIDEOCLIP', 'VIDEOEPISODE'):
+ video_id = compat_str(content['image']['svtId'])
+ entries.append(self.url_result(
+ 'svt:' + video_id, SVTPlayIE.ie_key(), video_id))
+
+ for media in article.get('media', []):
+ _process_content(media)
+
+ for obj in article.get('structuredBody', []):
+ _process_content(obj.get('content') or {})
+
+ return self.playlist_result(
+ entries, str_or_none(article.get('id')),
+ strip_or_none(article.get('title')))
diff --git a/youtube_dl/extractor/swrmediathek.py b/youtube_dlc/extractor/swrmediathek.py
index 0f615979e..0f615979e 100644
--- a/youtube_dl/extractor/swrmediathek.py
+++ b/youtube_dlc/extractor/swrmediathek.py
diff --git a/youtube_dl/extractor/syfy.py b/youtube_dlc/extractor/syfy.py
index def7e5a2c..def7e5a2c 100644
--- a/youtube_dl/extractor/syfy.py
+++ b/youtube_dlc/extractor/syfy.py
diff --git a/youtube_dl/extractor/sztvhu.py b/youtube_dlc/extractor/sztvhu.py
index cfad33146..cfad33146 100644
--- a/youtube_dl/extractor/sztvhu.py
+++ b/youtube_dlc/extractor/sztvhu.py
diff --git a/youtube_dl/extractor/tagesschau.py b/youtube_dlc/extractor/tagesschau.py
index c351b7545..c351b7545 100644
--- a/youtube_dl/extractor/tagesschau.py
+++ b/youtube_dlc/extractor/tagesschau.py
diff --git a/youtube_dl/extractor/tass.py b/youtube_dlc/extractor/tass.py
index 6d336da78..6d336da78 100644
--- a/youtube_dl/extractor/tass.py
+++ b/youtube_dlc/extractor/tass.py
diff --git a/youtube_dl/extractor/tastytrade.py b/youtube_dlc/extractor/tastytrade.py
index 7fe96bd5f..7fe96bd5f 100644
--- a/youtube_dl/extractor/tastytrade.py
+++ b/youtube_dlc/extractor/tastytrade.py
diff --git a/youtube_dl/extractor/tbs.py b/youtube_dlc/extractor/tbs.py
index e8a7c65e0..e8a7c65e0 100644
--- a/youtube_dl/extractor/tbs.py
+++ b/youtube_dlc/extractor/tbs.py
diff --git a/youtube_dl/extractor/tdslifeway.py b/youtube_dlc/extractor/tdslifeway.py
index 101c6ee31..101c6ee31 100644
--- a/youtube_dl/extractor/tdslifeway.py
+++ b/youtube_dlc/extractor/tdslifeway.py
diff --git a/youtube_dlc/extractor/teachable.py b/youtube_dlc/extractor/teachable.py
new file mode 100644
index 000000000..a75369dbe
--- /dev/null
+++ b/youtube_dlc/extractor/teachable.py
@@ -0,0 +1,298 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .wistia import WistiaIE
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ get_element_by_class,
+ strip_or_none,
+ urlencode_postdata,
+ urljoin,
+)
+
+
+class TeachableBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'teachable'
+ _URL_PREFIX = 'teachable:'
+
+ _SITES = {
+ # Only notable ones here
+ 'v1.upskillcourses.com': 'upskill',
+ 'gns3.teachable.com': 'gns3',
+ 'academyhacker.com': 'academyhacker',
+ 'stackskills.com': 'stackskills',
+ 'market.saleshacker.com': 'saleshacker',
+ 'learnability.org': 'learnability',
+ 'edurila.com': 'edurila',
+ 'courses.workitdaily.com': 'workitdaily',
+ }
+
+ _VALID_URL_SUB_TUPLE = (_URL_PREFIX, '|'.join(re.escape(site) for site in _SITES.keys()))
+
+ def _real_initialize(self):
+ self._logged_in = False
+
+ def _login(self, site):
+ if self._logged_in:
+ return
+
+ username, password = self._get_login_info(
+ netrc_machine=self._SITES.get(site, site))
+ if username is None:
+ return
+
+ login_page, urlh = self._download_webpage_handle(
+ 'https://%s/sign_in' % site, None,
+ 'Downloading %s login page' % site)
+
+ def is_logged(webpage):
+ return any(re.search(p, webpage) for p in (
+ r'class=["\']user-signout',
+ r'<a[^>]+\bhref=["\']/sign_out',
+ r'Log\s+[Oo]ut\s*<'))
+
+ if is_logged(login_page):
+ self._logged_in = True
+ return
+
+ login_url = urlh.geturl()
+
+ login_form = self._hidden_inputs(login_page)
+
+ login_form.update({
+ 'user[email]': username,
+ 'user[password]': password,
+ })
+
+ post_url = self._search_regex(
+ r'<form[^>]+action=(["\'])(?P<url>(?:(?!\1).)+)\1', login_page,
+ 'post url', default=login_url, group='url')
+
+ if not post_url.startswith('http'):
+ post_url = urljoin(login_url, post_url)
+
+ response = self._download_webpage(
+ post_url, None, 'Logging in to %s' % site,
+ data=urlencode_postdata(login_form),
+ headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ 'Referer': login_url,
+ })
+
+ if '>I accept the new Privacy Policy<' in response:
+ raise ExtractorError(
+ 'Unable to login: %s asks you to accept new Privacy Policy. '
+ 'Go to https://%s/ and accept.' % (site, site), expected=True)
+
+ # Successful login
+ if is_logged(response):
+ self._logged_in = True
+ return
+
+ message = get_element_by_class('alert', response)
+ if message is not None:
+ raise ExtractorError(
+ 'Unable to login: %s' % clean_html(message), expected=True)
+
+ raise ExtractorError('Unable to log in')
+
+
+class TeachableIE(TeachableBaseIE):
+ _VALID_URL = r'''(?x)
+ (?:
+ %shttps?://(?P<site_t>[^/]+)|
+ https?://(?:www\.)?(?P<site>%s)
+ )
+ /courses/[^/]+/lectures/(?P<id>\d+)
+ ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE
+
+ _TESTS = [{
+ 'url': 'https://gns3.teachable.com/courses/gns3-certified-associate/lectures/6842364',
+ 'info_dict': {
+ 'id': 'untlgzk1v7',
+ 'ext': 'bin',
+ 'title': 'Overview',
+ 'description': 'md5:071463ff08b86c208811130ea1c2464c',
+ 'duration': 736.4,
+ 'timestamp': 1542315762,
+ 'upload_date': '20181115',
+ 'chapter': 'Welcome',
+ 'chapter_number': 1,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://v1.upskillcourses.com/courses/119763/lectures/1747100',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://gns3.teachable.com/courses/423415/lectures/6885939',
+ 'only_matching': True,
+ }, {
+ 'url': 'teachable:https://v1.upskillcourses.com/courses/essential-web-developer-course/lectures/1747100',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _is_teachable(webpage):
+ return 'teachableTracker.linker:autoLink' in webpage and re.search(
+ r'<link[^>]+href=["\']https?://process\.fs\.teachablecdn\.com',
+ webpage)
+
+ @staticmethod
+ def _extract_url(webpage, source_url):
+ if not TeachableIE._is_teachable(webpage):
+ return
+ if re.match(r'https?://[^/]+/(?:courses|p)', source_url):
+ return '%s%s' % (TeachableBaseIE._URL_PREFIX, source_url)
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ site = mobj.group('site') or mobj.group('site_t')
+ video_id = mobj.group('id')
+
+ self._login(site)
+
+ prefixed = url.startswith(self._URL_PREFIX)
+ if prefixed:
+ url = url[len(self._URL_PREFIX):]
+
+ webpage = self._download_webpage(url, video_id)
+
+ wistia_urls = WistiaIE._extract_urls(webpage)
+ if not wistia_urls:
+ if any(re.search(p, webpage) for p in (
+ r'class=["\']lecture-contents-locked',
+ r'>\s*Lecture contents locked',
+ r'id=["\']lecture-locked',
+ # https://academy.tailoredtutors.co.uk/courses/108779/lectures/1955313
+ r'class=["\'](?:inner-)?lesson-locked',
+ r'>LESSON LOCKED<')):
+ self.raise_login_required('Lecture contents locked')
+ raise ExtractorError('Unable to find video URL')
+
+ title = self._og_search_title(webpage, default=None)
+
+ chapter = None
+ chapter_number = None
+ section_item = self._search_regex(
+ r'(?s)(?P<li><li[^>]+\bdata-lecture-id=["\']%s[^>]+>.+?</li>)' % video_id,
+ webpage, 'section item', default=None, group='li')
+ if section_item:
+ chapter_number = int_or_none(self._search_regex(
+ r'data-ss-position=["\'](\d+)', section_item, 'section id',
+ default=None))
+ if chapter_number is not None:
+ sections = []
+ for s in re.findall(
+ r'(?s)<div[^>]+\bclass=["\']section-title[^>]+>(.+?)</div>', webpage):
+ section = strip_or_none(clean_html(s))
+ if not section:
+ sections = []
+ break
+ sections.append(section)
+ if chapter_number <= len(sections):
+ chapter = sections[chapter_number - 1]
+
+ entries = [{
+ '_type': 'url_transparent',
+ 'url': wistia_url,
+ 'ie_key': WistiaIE.ie_key(),
+ 'title': title,
+ 'chapter': chapter,
+ 'chapter_number': chapter_number,
+ } for wistia_url in wistia_urls]
+
+ return self.playlist_result(entries, video_id, title)
+
+
+class TeachableCourseIE(TeachableBaseIE):
+ _VALID_URL = r'''(?x)
+ (?:
+ %shttps?://(?P<site_t>[^/]+)|
+ https?://(?:www\.)?(?P<site>%s)
+ )
+ /(?:courses|p)/(?:enrolled/)?(?P<id>[^/?#&]+)
+ ''' % TeachableBaseIE._VALID_URL_SUB_TUPLE
+ _TESTS = [{
+ 'url': 'http://v1.upskillcourses.com/courses/essential-web-developer-course/',
+ 'info_dict': {
+ 'id': 'essential-web-developer-course',
+ 'title': 'The Essential Web Developer Course (Free)',
+ },
+ 'playlist_count': 192,
+ }, {
+ 'url': 'http://v1.upskillcourses.com/courses/119763/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://v1.upskillcourses.com/courses/enrolled/119763',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://gns3.teachable.com/courses/enrolled/423415',
+ 'only_matching': True,
+ }, {
+ 'url': 'teachable:https://learn.vrdev.school/p/gear-vr-developer-mini',
+ 'only_matching': True,
+ }, {
+ 'url': 'teachable:https://filmsimplified.com/p/davinci-resolve-15-crash-course',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if TeachableIE.suitable(url) else super(
+ TeachableCourseIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ site = mobj.group('site') or mobj.group('site_t')
+ course_id = mobj.group('id')
+
+ self._login(site)
+
+ prefixed = url.startswith(self._URL_PREFIX)
+ if prefixed:
+ prefix = self._URL_PREFIX
+ url = url[len(prefix):]
+
+ webpage = self._download_webpage(url, course_id)
+
+ url_base = 'https://%s/' % site
+
+ entries = []
+
+ for mobj in re.finditer(
+ r'(?s)(?P<li><li[^>]+class=(["\'])(?:(?!\2).)*?section-item[^>]+>.+?</li>)',
+ webpage):
+ li = mobj.group('li')
+ if 'fa-youtube-play' not in li:
+ continue
+ lecture_url = self._search_regex(
+ r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', li,
+ 'lecture url', default=None, group='url')
+ if not lecture_url:
+ continue
+ lecture_id = self._search_regex(
+ r'/lectures/(\d+)', lecture_url, 'lecture id', default=None)
+ title = self._html_search_regex(
+ r'<span[^>]+class=["\']lecture-name[^>]+>([^<]+)', li,
+ 'title', default=None)
+ entry_url = urljoin(url_base, lecture_url)
+ if prefixed:
+ entry_url = self._URL_PREFIX + entry_url
+ entries.append(
+ self.url_result(
+ entry_url,
+ ie=TeachableIE.ie_key(), video_id=lecture_id,
+ video_title=clean_html(title)))
+
+ course_title = self._html_search_regex(
+ (r'(?s)<img[^>]+class=["\']course-image[^>]+>\s*<h\d>(.+?)</h',
+ r'(?s)<h\d[^>]+class=["\']course-title[^>]+>(.+?)</h'),
+ webpage, 'course title', fatal=False)
+
+ return self.playlist_result(entries, course_id, course_title)
diff --git a/youtube_dl/extractor/teachertube.py b/youtube_dlc/extractor/teachertube.py
index 1272078c5..1272078c5 100644
--- a/youtube_dl/extractor/teachertube.py
+++ b/youtube_dlc/extractor/teachertube.py
diff --git a/youtube_dlc/extractor/teachingchannel.py b/youtube_dlc/extractor/teachingchannel.py
new file mode 100644
index 000000000..624cdb3ad
--- /dev/null
+++ b/youtube_dlc/extractor/teachingchannel.py
@@ -0,0 +1,33 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class TeachingChannelIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?teachingchannel\.org/videos?/(?P<id>[^/?&#]+)'
+
+ _TEST = {
+ 'url': 'https://www.teachingchannel.org/videos/teacher-teaming-evolution',
+ 'info_dict': {
+ 'id': '3swwlzkT',
+ 'ext': 'mp4',
+ 'title': 'A History of Teaming',
+ 'description': 'md5:2a9033db8da81f2edffa4c99888140b3',
+ 'duration': 422,
+ 'upload_date': '20170316',
+ 'timestamp': 1489691297,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['JWPlatform'],
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ mid = self._search_regex(
+ r'(?:data-mid=["\']|id=["\']jw-video-player-)([a-zA-Z0-9]{8})',
+ webpage, 'media id')
+
+ return self.url_result('jwplatform:' + mid, 'JWPlatform', mid)
diff --git a/youtube_dlc/extractor/teamcoco.py b/youtube_dlc/extractor/teamcoco.py
new file mode 100644
index 000000000..5793b711f
--- /dev/null
+++ b/youtube_dlc/extractor/teamcoco.py
@@ -0,0 +1,205 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .turner import TurnerBaseIE
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ mimetype2ext,
+ parse_duration,
+ parse_iso8601,
+ qualities,
+)
+
+
+class TeamcocoIE(TurnerBaseIE):
+ _VALID_URL = r'https?://(?:\w+\.)?teamcoco\.com/(?P<id>([^/]+/)*[^/?#]+)'
+ _TESTS = [
+ {
+ 'url': 'http://teamcoco.com/video/mary-kay-remote',
+ 'md5': '55d532f81992f5c92046ad02fec34d7d',
+ 'info_dict': {
+ 'id': '80187',
+ 'ext': 'mp4',
+ 'title': 'Conan Becomes A Mary Kay Beauty Consultant',
+ 'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.',
+ 'duration': 495.0,
+ 'upload_date': '20140402',
+ 'timestamp': 1396407600,
+ }
+ }, {
+ 'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush',
+ 'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',
+ 'info_dict': {
+ 'id': '19705',
+ 'ext': 'mp4',
+ 'description': 'Louis C.K. got starstruck by George W. Bush, so what? Part one.',
+ 'title': 'Louis C.K. Interview Pt. 1 11/3/11',
+ 'duration': 288,
+ 'upload_date': '20111104',
+ 'timestamp': 1320405840,
+ }
+ }, {
+ 'url': 'http://teamcoco.com/video/timothy-olyphant-drinking-whiskey',
+ 'info_dict': {
+ 'id': '88748',
+ 'ext': 'mp4',
+ 'title': 'Timothy Olyphant Raises A Toast To “Justified”',
+ 'description': 'md5:15501f23f020e793aeca761205e42c24',
+ 'upload_date': '20150415',
+ 'timestamp': 1429088400,
+ },
+ 'params': {
+ 'skip_download': True, # m3u8 downloads
+ }
+ }, {
+ 'url': 'http://teamcoco.com/video/full-episode-mon-6-1-joel-mchale-jake-tapper-and-musical-guest-courtney-barnett?playlist=x;eyJ0eXBlIjoidGFnIiwiaWQiOjl9',
+ 'info_dict': {
+ 'id': '89341',
+ 'ext': 'mp4',
+ 'title': 'Full Episode - Mon. 6/1 - Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett',
+ 'description': 'Guests: Joel McHale, Jake Tapper, And Musical Guest Courtney Barnett',
+ },
+ 'params': {
+ 'skip_download': True, # m3u8 downloads
+ },
+ 'skip': 'This video is no longer available.',
+ }, {
+ 'url': 'http://teamcoco.com/video/the-conan-audiencey-awards-for-04/25/18',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://teamcoco.com/italy/conan-jordan-schlansky-hit-the-streets-of-florence',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://teamcoco.com/haiti/conan-s-haitian-history-lesson',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://teamcoco.com/israel/conan-hits-the-streets-beaches-of-tel-aviv',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://conan25.teamcoco.com/video/ice-cube-kevin-hart-conan-share-lyft',
+ 'only_matching': True,
+ }
+ ]
+ _RECORD_TEMPL = '''id
+ title
+ teaser
+ publishOn
+ thumb {
+ preview
+ }
+ tags {
+ name
+ }
+ duration
+ turnerMediaId
+ turnerMediaAuthToken'''
+
+ def _graphql_call(self, query_template, object_type, object_id):
+ find_object = 'find' + object_type
+ return self._download_json(
+ 'https://teamcoco.com/graphql', object_id, data=json.dumps({
+ 'query': query_template % (find_object, object_id)
+ }).encode(), headers={
+ 'Content-Type': 'application/json',
+ })['data'][find_object]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ response = self._graphql_call('''{
+ %%s(slug: "%%s") {
+ ... on RecordSlug {
+ record {
+ %s
+ }
+ }
+ ... on PageSlug {
+ child {
+ id
+ }
+ }
+ ... on NotFoundSlug {
+ status
+ }
+ }
+}''' % self._RECORD_TEMPL, 'Slug', display_id)
+ if response.get('status'):
+ raise ExtractorError('This video is no longer available.', expected=True)
+
+ child = response.get('child')
+ if child:
+ record = self._graphql_call('''{
+ %%s(id: "%%s") {
+ ... on Video {
+ %s
+ }
+ }
+}''' % self._RECORD_TEMPL, 'Record', child['id'])
+ else:
+ record = response['record']
+ video_id = record['id']
+
+ info = {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': record['title'],
+ 'thumbnail': record.get('thumb', {}).get('preview'),
+ 'description': record.get('teaser'),
+ 'duration': parse_duration(record.get('duration')),
+ 'timestamp': parse_iso8601(record.get('publishOn')),
+ }
+
+ media_id = record.get('turnerMediaId')
+ if media_id:
+ self._initialize_geo_bypass({
+ 'countries': ['US'],
+ })
+ info.update(self._extract_ngtv_info(media_id, {
+ 'accessToken': record['turnerMediaAuthToken'],
+ 'accessTokenType': 'jws',
+ }))
+ else:
+ video_sources = self._download_json(
+ 'https://teamcoco.com/_truman/d/' + video_id,
+ video_id)['meta']['src']
+ if isinstance(video_sources, dict):
+ video_sources = video_sources.values()
+
+ formats = []
+ get_quality = qualities(['low', 'sd', 'hd', 'uhd'])
+ for src in video_sources:
+ if not isinstance(src, dict):
+ continue
+ src_url = src.get('src')
+ if not src_url:
+ continue
+ format_id = src.get('label')
+ ext = determine_ext(src_url, mimetype2ext(src.get('type')))
+ if format_id == 'hls' or ext == 'm3u8':
+ # compat_urllib_parse.urljoin does not work here
+ if src_url.startswith('/'):
+ src_url = 'http://ht.cdn.turner.com/tbs/big/teamcoco' + src_url
+ formats.extend(self._extract_m3u8_formats(
+ src_url, video_id, 'mp4', m3u8_id=format_id, fatal=False))
+ else:
+ if src_url.startswith('/mp4:protected/'):
+ # TODO Correct extraction for these files
+ continue
+ tbr = int_or_none(self._search_regex(
+ r'(\d+)k\.mp4', src_url, 'tbr', default=None))
+
+ formats.append({
+ 'url': src_url,
+ 'ext': ext,
+ 'tbr': tbr,
+ 'format_id': format_id,
+ 'quality': get_quality(format_id),
+ })
+ self._sort_formats(formats)
+ info['formats'] = formats
+
+ return info
diff --git a/youtube_dl/extractor/teamtreehouse.py b/youtube_dlc/extractor/teamtreehouse.py
index d347e97ef..d347e97ef 100644
--- a/youtube_dl/extractor/teamtreehouse.py
+++ b/youtube_dlc/extractor/teamtreehouse.py
diff --git a/youtube_dl/extractor/techtalks.py b/youtube_dlc/extractor/techtalks.py
index a5b62c717..a5b62c717 100644
--- a/youtube_dl/extractor/techtalks.py
+++ b/youtube_dlc/extractor/techtalks.py
diff --git a/youtube_dl/extractor/ted.py b/youtube_dlc/extractor/ted.py
index 63e2455b2..63e2455b2 100644
--- a/youtube_dl/extractor/ted.py
+++ b/youtube_dlc/extractor/ted.py
diff --git a/youtube_dl/extractor/tele13.py b/youtube_dlc/extractor/tele13.py
index a29a64b6d..a29a64b6d 100644
--- a/youtube_dl/extractor/tele13.py
+++ b/youtube_dlc/extractor/tele13.py
diff --git a/youtube_dlc/extractor/tele5.py b/youtube_dlc/extractor/tele5.py
new file mode 100644
index 000000000..3e1a7a9e6
--- /dev/null
+++ b/youtube_dlc/extractor/tele5.py
@@ -0,0 +1,108 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from .jwplatform import JWPlatformIE
+from .nexx import NexxIE
+from ..compat import compat_urlparse
+from ..utils import (
+ NO_DEFAULT,
+ smuggle_url,
+)
+
+
+class Tele5IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tele5\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _GEO_COUNTRIES = ['DE']
+ _TESTS = [{
+ 'url': 'https://www.tele5.de/mediathek/filme-online/videos?vid=1549416',
+ 'info_dict': {
+ 'id': '1549416',
+ 'ext': 'mp4',
+ 'upload_date': '20180814',
+ 'timestamp': 1534290623,
+ 'title': 'Pandorum',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # jwplatform, nexx unavailable
+ 'url': 'https://www.tele5.de/filme/ghoul-das-geheimnis-des-friedhofmonsters/',
+ 'info_dict': {
+ 'id': 'WJuiOlUp',
+ 'ext': 'mp4',
+ 'upload_date': '20200603',
+ 'timestamp': 1591214400,
+ 'title': 'Ghoul - Das Geheimnis des Friedhofmonsters',
+ 'description': 'md5:42002af1d887ff3d5b2b3ca1f8137d97',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [JWPlatformIE.ie_key()],
+ }, {
+ 'url': 'https://www.tele5.de/kalkofes-mattscheibe/video-clips/politik-und-gesellschaft?ve_id=1551191',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tele5.de/video-clip/?ve_id=1609440',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tele5.de/filme/schlefaz-dragon-crusaders/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tele5.de/filme/making-of/avengers-endgame/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tele5.de/star-trek/raumschiff-voyager/ganze-folge/das-vinculum/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tele5.de/anders-ist-sevda/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ video_id = (qs.get('vid') or qs.get('ve_id') or [None])[0]
+
+ NEXX_ID_RE = r'\d{6,}'
+ JWPLATFORM_ID_RE = r'[a-zA-Z0-9]{8}'
+
+ def nexx_result(nexx_id):
+ return self.url_result(
+ 'https://api.nexx.cloud/v3/759/videos/byid/%s' % nexx_id,
+ ie=NexxIE.ie_key(), video_id=nexx_id)
+
+ nexx_id = jwplatform_id = None
+
+ if video_id:
+ if re.match(NEXX_ID_RE, video_id):
+ return nexx_result(video_id)
+ elif re.match(JWPLATFORM_ID_RE, video_id):
+ jwplatform_id = video_id
+
+ if not nexx_id:
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ def extract_id(pattern, name, default=NO_DEFAULT):
+ return self._html_search_regex(
+ (r'id\s*=\s*["\']video-player["\'][^>]+data-id\s*=\s*["\'](%s)' % pattern,
+ r'\s+id\s*=\s*["\']player_(%s)' % pattern,
+ r'\bdata-id\s*=\s*["\'](%s)' % pattern), webpage, name,
+ default=default)
+
+ nexx_id = extract_id(NEXX_ID_RE, 'nexx id', default=None)
+ if nexx_id:
+ return nexx_result(nexx_id)
+
+ if not jwplatform_id:
+ jwplatform_id = extract_id(JWPLATFORM_ID_RE, 'jwplatform id')
+
+ return self.url_result(
+ smuggle_url(
+ 'jwplatform:%s' % jwplatform_id,
+ {'geo_countries': self._GEO_COUNTRIES}),
+ ie=JWPlatformIE.ie_key(), video_id=jwplatform_id)
diff --git a/youtube_dl/extractor/telebruxelles.py b/youtube_dlc/extractor/telebruxelles.py
index a0353fe3a..a0353fe3a 100644
--- a/youtube_dl/extractor/telebruxelles.py
+++ b/youtube_dlc/extractor/telebruxelles.py
diff --git a/youtube_dlc/extractor/telecinco.py b/youtube_dlc/extractor/telecinco.py
new file mode 100644
index 000000000..9ba3da341
--- /dev/null
+++ b/youtube_dlc/extractor/telecinco.py
@@ -0,0 +1,188 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from .ooyala import OoyalaIE
+from ..utils import (
+ clean_html,
+ determine_ext,
+ int_or_none,
+ str_or_none,
+ try_get,
+ urljoin,
+)
+
+
+class TelecincoIE(InfoExtractor):
+ IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
+ _VALID_URL = r'https?://(?:www\.)?(?:telecinco\.es|cuatro\.com|mediaset\.es)/(?:[^/]+/)+(?P<id>.+?)\.html'
+
+ _TESTS = [{
+ 'url': 'http://www.telecinco.es/robinfood/temporada-01/t01xp14/Bacalao-cocochas-pil-pil_0_1876350223.html',
+ 'info_dict': {
+ 'id': '1876350223',
+ 'title': 'Bacalao con kokotxas al pil-pil',
+ 'description': 'md5:716caf5601e25c3c5ab6605b1ae71529',
+ },
+ 'playlist': [{
+ 'md5': 'adb28c37238b675dad0f042292f209a7',
+ 'info_dict': {
+ 'id': 'JEA5ijCnF6p5W08A1rNKn7',
+ 'ext': 'mp4',
+ 'title': 'Con Martín Berasategui, hacer un bacalao al pil-pil es fácil y divertido',
+ 'duration': 662,
+ },
+ }]
+ }, {
+ 'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',
+ 'md5': '9468140ebc300fbb8b9d65dc6e5c4b43',
+ 'info_dict': {
+ 'id': 'jn24Od1zGLG4XUZcnUnZB6',
+ 'ext': 'mp4',
+ 'title': '¿Quién es este ex futbolista con el que hablan Leo Messi y Luis Suárez?',
+ 'description': 'md5:a62ecb5f1934fc787107d7b9a2262805',
+ 'duration': 79,
+ },
+ }, {
+ 'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',
+ 'md5': 'ae2dc6b7b50b2392076a51c0f70e01f6',
+ 'info_dict': {
+ 'id': 'aywerkD2Sv1vGNqq9b85Q2',
+ 'ext': 'mp4',
+ 'title': '#DOYLACARA. Con la trata no hay trato',
+ 'description': 'md5:2771356ff7bfad9179c5f5cd954f1477',
+ 'duration': 50,
+ },
+ }, {
+ # video in opening's content
+ 'url': 'https://www.telecinco.es/vivalavida/fiorella-sobrina-edmundo-arrocet-entrevista_18_2907195140.html',
+ 'info_dict': {
+ 'id': '2907195140',
+ 'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"',
+ 'description': 'md5:73f340a7320143d37ab895375b2bf13a',
+ },
+ 'playlist': [{
+ 'md5': 'adb28c37238b675dad0f042292f209a7',
+ 'info_dict': {
+ 'id': 'TpI2EttSDAReWpJ1o0NVh2',
+ 'ext': 'mp4',
+ 'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"',
+ 'duration': 1015,
+ },
+ }],
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.telecinco.es/espanasinirmaslejos/Espana-gran-destino-turistico_2_1240605043.html',
+ 'only_matching': True,
+ }, {
+ # ooyala video
+ 'url': 'http://www.cuatro.com/chesterinlove/a-carta/chester-chester_in_love-chester_edu_2_2331030022.html',
+ 'only_matching': True,
+ }]
+
+ def _parse_content(self, content, url):
+ video_id = content['dataMediaId']
+ if content.get('dataCmsId') == 'ooyala':
+ return self.url_result(
+ 'ooyala:%s' % video_id, OoyalaIE.ie_key(), video_id)
+ config_url = urljoin(url, content['dataConfig'])
+ config = self._download_json(
+ config_url, video_id, 'Downloading config JSON')
+ title = config['info']['title']
+
+ def mmc_url(mmc_type):
+ return re.sub(
+ r'/(?:flash|html5)\.json', '/%s.json' % mmc_type,
+ config['services']['mmc'])
+
+ duration = None
+ formats = []
+ for mmc_type in ('flash', 'html5'):
+ mmc = self._download_json(
+ mmc_url(mmc_type), video_id,
+ 'Downloading %s mmc JSON' % mmc_type, fatal=False)
+ if not mmc:
+ continue
+ if not duration:
+ duration = int_or_none(mmc.get('duration'))
+ for location in mmc['locations']:
+ gat = self._proto_relative_url(location.get('gat'), 'http:')
+ gcp = location.get('gcp')
+ ogn = location.get('ogn')
+ if None in (gat, gcp, ogn):
+ continue
+ token_data = {
+ 'gcp': gcp,
+ 'ogn': ogn,
+ 'sta': 0,
+ }
+ media = self._download_json(
+ gat, video_id, data=json.dumps(token_data).encode('utf-8'),
+ headers={
+ 'Content-Type': 'application/json;charset=utf-8',
+ 'Referer': url,
+ }, fatal=False) or {}
+ stream = media.get('stream') or media.get('file')
+ if not stream:
+ continue
+ ext = determine_ext(stream)
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ stream + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
+ video_id, f4m_id='hds', fatal=False))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ stream, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ 'thumbnail': content.get('dataPoster') or config.get('poster', {}).get('imageUrl'),
+ 'duration': duration,
+ }
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ article = self._parse_json(self._search_regex(
+ r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=\s*({.+})',
+ webpage, 'article'), display_id)['article']
+ title = article.get('title')
+ description = clean_html(article.get('leadParagraph')) or ''
+ if article.get('editorialType') != 'VID':
+ entries = []
+ body = [article.get('opening')]
+ body.extend(try_get(article, lambda x: x['body'], list) or [])
+ for p in body:
+ if not isinstance(p, dict):
+ continue
+ content = p.get('content')
+ if not content:
+ continue
+ type_ = p.get('type')
+ if type_ == 'paragraph':
+ content_str = str_or_none(content)
+ if content_str:
+ description += content_str
+ continue
+ if type_ == 'video' and isinstance(content, dict):
+ entries.append(self._parse_content(content, url))
+ return self.playlist_result(
+ entries, str_or_none(article.get('id')), title, description)
+ content = article['opening']['content']
+ info = self._parse_content(content, url)
+ info.update({
+ 'description': description,
+ })
+ return info
diff --git a/youtube_dlc/extractor/telegraaf.py b/youtube_dlc/extractor/telegraaf.py
new file mode 100644
index 000000000..2dc020537
--- /dev/null
+++ b/youtube_dlc/extractor/telegraaf.py
@@ -0,0 +1,89 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ parse_iso8601,
+ try_get,
+)
+
+
+class TelegraafIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/video/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.telegraaf.nl/video/734366489/historisch-scheepswrak-slaat-na-100-jaar-los',
+ 'info_dict': {
+ 'id': 'gaMItuoSeUg2',
+ 'ext': 'mp4',
+ 'title': 'Historisch scheepswrak slaat na 100 jaar los',
+ 'description': 'md5:6f53b7c4f55596722ac24d6c0ec00cfb',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 55,
+ 'timestamp': 1572805527,
+ 'upload_date': '20191103',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+
+ video_id = self._download_json(
+ 'https://www.telegraaf.nl/graphql', article_id, query={
+ 'query': '''{
+ article(uid: %s) {
+ videos {
+ videoId
+ }
+ }
+}''' % article_id,
+ })['data']['article']['videos'][0]['videoId']
+
+ item = self._download_json(
+ 'https://content.tmgvideo.nl/playlist/item=%s/playlist.json' % video_id,
+ video_id)['items'][0]
+ title = item['title']
+
+ formats = []
+ locations = item.get('locations') or {}
+ for location in locations.get('adaptive', []):
+ manifest_url = location.get('src')
+ if not manifest_url:
+ continue
+ ext = determine_ext(manifest_url)
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ manifest_url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ manifest_url, video_id, mpd_id='dash', fatal=False))
+ else:
+ self.report_warning('Unknown adaptive format %s' % ext)
+ for location in locations.get('progressive', []):
+ src = try_get(location, lambda x: x['sources'][0]['src'])
+ if not src:
+ continue
+ label = location.get('label')
+ formats.append({
+ 'url': src,
+ 'width': int_or_none(location.get('width')),
+ 'height': int_or_none(location.get('height')),
+ 'format_id': 'http' + ('-%s' % label if label else ''),
+ })
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': item.get('description'),
+ 'formats': formats,
+ 'duration': int_or_none(item.get('duration')),
+ 'thumbnail': item.get('poster'),
+ 'timestamp': parse_iso8601(item.get('datecreated'), ' '),
+ }
diff --git a/youtube_dl/extractor/telemb.py b/youtube_dlc/extractor/telemb.py
index 9bcac4ec0..9bcac4ec0 100644
--- a/youtube_dl/extractor/telemb.py
+++ b/youtube_dlc/extractor/telemb.py
diff --git a/youtube_dlc/extractor/telequebec.py b/youtube_dlc/extractor/telequebec.py
new file mode 100644
index 000000000..c82c94b3a
--- /dev/null
+++ b/youtube_dlc/extractor/telequebec.py
@@ -0,0 +1,205 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ smuggle_url,
+ try_get,
+ unified_timestamp,
+)
+
+
+class TeleQuebecBaseIE(InfoExtractor):
+ @staticmethod
+ def _limelight_result(media_id):
+ return {
+ '_type': 'url_transparent',
+ 'url': smuggle_url(
+ 'limelight:media:' + media_id, {'geo_countries': ['CA']}),
+ 'ie_key': 'LimelightMedia',
+ }
+
+
+class TeleQuebecIE(TeleQuebecBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ zonevideo\.telequebec\.tv/media|
+ coucou\.telequebec\.tv/videos
+ )/(?P<id>\d+)
+ '''
+ _TESTS = [{
+ # available till 01.01.2023
+ 'url': 'http://zonevideo.telequebec.tv/media/37578/un-petit-choc-et-puis-repart/un-chef-a-la-cabane',
+ 'info_dict': {
+ 'id': '577116881b4b439084e6b1cf4ef8b1b3',
+ 'ext': 'mp4',
+ 'title': 'Un petit choc et puis repart!',
+ 'description': 'md5:b04a7e6b3f74e32d7b294cffe8658374',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # no description
+ 'url': 'http://zonevideo.telequebec.tv/media/30261',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://coucou.telequebec.tv/videos/41788/idee-de-genie/l-heure-du-bain',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ media_id = self._match_id(url)
+
+ media_data = self._download_json(
+ 'https://mnmedias.api.telequebec.tv/api/v2/media/' + media_id,
+ media_id)['media']
+
+ info = self._limelight_result(media_data['streamInfo']['sourceId'])
+ info.update({
+ 'title': media_data.get('title'),
+ 'description': try_get(
+ media_data, lambda x: x['descriptions'][0]['text'], compat_str),
+ 'duration': int_or_none(
+ media_data.get('durationInMilliseconds'), 1000),
+ })
+ return info
+
+
+class TeleQuebecSquatIE(InfoExtractor):
+ _VALID_URL = r'https://squat\.telequebec\.tv/videos/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://squat.telequebec.tv/videos/9314',
+ 'info_dict': {
+ 'id': 'd59ae78112d542e793d83cc9d3a5b530',
+ 'ext': 'mp4',
+ 'title': 'Poupeflekta',
+ 'description': 'md5:2f0718f8d2f8fece1646ee25fb7bce75',
+ 'duration': 1351,
+ 'timestamp': 1569057600,
+ 'upload_date': '20190921',
+ 'series': 'Miraculous : Les Aventures de Ladybug et Chat Noir',
+ 'season': 'Saison 3',
+ 'season_number': 3,
+ 'episode_number': 57,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'https://squat.api.telequebec.tv/v1/videos/%s' % video_id,
+ video_id)
+
+ media_id = video['sourceId']
+
+ return {
+ '_type': 'url_transparent',
+ 'url': 'http://zonevideo.telequebec.tv/media/%s' % media_id,
+ 'ie_key': TeleQuebecIE.ie_key(),
+ 'id': media_id,
+ 'title': video.get('titre'),
+ 'description': video.get('description'),
+ 'timestamp': unified_timestamp(video.get('datePublication')),
+ 'series': video.get('container'),
+ 'season': video.get('saison'),
+ 'season_number': int_or_none(video.get('noSaison')),
+ 'episode_number': int_or_none(video.get('episode')),
+ }
+
+
+class TeleQuebecEmissionIE(TeleQuebecBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ [^/]+\.telequebec\.tv/emissions/|
+ (?:www\.)?telequebec\.tv/
+ )
+ (?P<id>[^?#&]+)
+ '''
+ _TESTS = [{
+ 'url': 'http://lindicemcsween.telequebec.tv/emissions/100430013/des-soins-esthetiques-a-377-d-interets-annuels-ca-vous-tente',
+ 'info_dict': {
+ 'id': '66648a6aef914fe3badda25e81a4d50a',
+ 'ext': 'mp4',
+ 'title': "Des soins esthétiques à 377 % d'intérêts annuels, ça vous tente?",
+ 'description': 'md5:369e0d55d0083f1fc9b71ffb640ea014',
+ 'upload_date': '20171024',
+ 'timestamp': 1508862118,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://bancpublic.telequebec.tv/emissions/emission-49/31986/jeunes-meres-sous-pression',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.telequebec.tv/masha-et-michka/epi059masha-et-michka-3-053-078',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.telequebec.tv/documentaire/bebes-sur-mesure/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ media_id = self._search_regex(
+ r'mediaUID\s*:\s*["\'][Ll]imelight_(?P<id>[a-z0-9]{32})', webpage,
+ 'limelight id')
+
+ info = self._limelight_result(media_id)
+ info.update({
+ 'title': self._og_search_title(webpage, default=None),
+ 'description': self._og_search_description(webpage, default=None),
+ })
+ return info
+
+
+class TeleQuebecLiveIE(InfoExtractor):
+ _VALID_URL = r'https?://zonevideo\.telequebec\.tv/(?P<id>endirect)'
+ _TEST = {
+ 'url': 'http://zonevideo.telequebec.tv/endirect/',
+ 'info_dict': {
+ 'id': 'endirect',
+ 'ext': 'mp4',
+ 'title': 're:^Télé-Québec - En direct [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'is_live': True,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ m3u8_url = None
+ webpage = self._download_webpage(
+ 'https://player.telequebec.tv/Tq_VideoPlayer.js', video_id,
+ fatal=False)
+ if webpage:
+ m3u8_url = self._search_regex(
+ r'm3U8Url\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'm3u8 url', default=None, group='url')
+ if not m3u8_url:
+ m3u8_url = 'https://teleqmmd.mmdlive.lldns.net/teleqmmd/f386e3b206814e1f8c8c1c71c0f8e748/manifest.m3u8'
+ formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', m3u8_id='hls')
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': self._live_title('Télé-Québec - En direct'),
+ 'is_live': True,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/teletask.py b/youtube_dlc/extractor/teletask.py
index b9e2ef8ca..b9e2ef8ca 100644
--- a/youtube_dl/extractor/teletask.py
+++ b/youtube_dlc/extractor/teletask.py
diff --git a/youtube_dl/extractor/telewebion.py b/youtube_dlc/extractor/telewebion.py
index 1207b1a1b..1207b1a1b 100644
--- a/youtube_dl/extractor/telewebion.py
+++ b/youtube_dlc/extractor/telewebion.py
diff --git a/youtube_dl/extractor/tennistv.py b/youtube_dlc/extractor/tennistv.py
index a586f30ad..a586f30ad 100644
--- a/youtube_dl/extractor/tennistv.py
+++ b/youtube_dlc/extractor/tennistv.py
diff --git a/youtube_dlc/extractor/tenplay.py b/youtube_dlc/extractor/tenplay.py
new file mode 100644
index 000000000..af325fea8
--- /dev/null
+++ b/youtube_dlc/extractor/tenplay.py
@@ -0,0 +1,58 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_age_limit,
+ parse_iso8601,
+ smuggle_url,
+)
+
+
+class TenPlayIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?:[^/]+/)+(?P<id>tpv\d{6}[a-z]{5})'
+ _TESTS = [{
+ 'url': 'https://10play.com.au/masterchef/episodes/season-1/masterchef-s1-ep-1/tpv190718kwzga',
+ 'info_dict': {
+ 'id': '6060533435001',
+ 'ext': 'mp4',
+ 'title': 'MasterChef - S1 Ep. 1',
+ 'description': 'md5:4fe7b78e28af8f2d900cd20d900ef95c',
+ 'age_limit': 10,
+ 'timestamp': 1240828200,
+ 'upload_date': '20090427',
+ 'uploader_id': '2199827728001',
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://10play.com.au/how-to-stay-married/web-extras/season-1/terrys-talks-ep-1-embracing-change/tpv190915ylupc',
+ 'only_matching': True,
+ }]
+ BRIGHTCOVE_URL_TEMPLATE = 'https://players.brightcove.net/2199827728001/cN6vRtRQt_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ content_id = self._match_id(url)
+ data = self._download_json(
+ 'https://10play.com.au/api/video/' + content_id, content_id)
+ video = data.get('video') or {}
+ metadata = data.get('metaData') or {}
+ brightcove_id = video.get('videoId') or metadata['showContentVideoId']
+ brightcove_url = smuggle_url(
+ self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id,
+ {'geo_countries': ['AU']})
+
+ return {
+ '_type': 'url_transparent',
+ 'url': brightcove_url,
+ 'id': content_id,
+ 'title': video.get('title') or metadata.get('pageContentName') or metadata.get('showContentName'),
+ 'description': video.get('description'),
+ 'age_limit': parse_age_limit(video.get('showRatingClassification') or metadata.get('showProgramClassification')),
+ 'series': metadata.get('showName'),
+ 'season': metadata.get('showContentSeason'),
+ 'timestamp': parse_iso8601(metadata.get('contentPublishDate') or metadata.get('pageContentPublishDate')),
+ 'ie_key': 'BrightcoveNew',
+ }
diff --git a/youtube_dl/extractor/testurl.py b/youtube_dlc/extractor/testurl.py
index 84a14a0bd..84a14a0bd 100644
--- a/youtube_dl/extractor/testurl.py
+++ b/youtube_dlc/extractor/testurl.py
diff --git a/youtube_dl/extractor/tf1.py b/youtube_dlc/extractor/tf1.py
index 55e2a0721..55e2a0721 100644
--- a/youtube_dl/extractor/tf1.py
+++ b/youtube_dlc/extractor/tf1.py
diff --git a/youtube_dlc/extractor/tfo.py b/youtube_dlc/extractor/tfo.py
new file mode 100644
index 000000000..0631cb7ab
--- /dev/null
+++ b/youtube_dlc/extractor/tfo.py
@@ -0,0 +1,55 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+ HEADRequest,
+ ExtractorError,
+ int_or_none,
+ clean_html,
+)
+
+
+class TFOIE(InfoExtractor):
+ _GEO_COUNTRIES = ['CA']
+ _VALID_URL = r'https?://(?:www\.)?tfo\.org/(?:en|fr)/(?:[^/]+/){2}(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.tfo.org/en/universe/tfo-247/100463871/video-game-hackathon',
+ 'md5': 'cafbe4f47a8dae0ca0159937878100d6',
+ 'info_dict': {
+ 'id': '7da3d50e495c406b8fc0b997659cc075',
+ 'ext': 'mp4',
+ 'title': 'Video Game Hackathon',
+ 'description': 'md5:558afeba217c6c8d96c60e5421795c07',
+ }
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ self._request_webpage(HEADRequest('http://www.tfo.org/'), video_id)
+ infos = self._download_json(
+ 'http://www.tfo.org/api/web/video/get_infos', video_id, data=json.dumps({
+ 'product_id': video_id,
+ }).encode(), headers={
+ 'X-tfo-session': self._get_cookies('http://www.tfo.org/')['tfo-session'].value,
+ })
+ if infos.get('success') == 0:
+ if infos.get('code') == 'ErrGeoBlocked':
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+ raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(infos['msg'])), expected=True)
+ video_data = infos['data']
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'url': 'limelight:media:' + video_data['llid'],
+ 'title': video_data['title'],
+ 'description': video_data.get('description'),
+ 'series': video_data.get('collection'),
+ 'season_number': int_or_none(video_data.get('season')),
+ 'episode_number': int_or_none(video_data.get('episode')),
+ 'duration': int_or_none(video_data.get('duration')),
+ 'ie_key': 'LimelightMedia',
+ }
diff --git a/youtube_dl/extractor/theintercept.py b/youtube_dlc/extractor/theintercept.py
index f23b58713..f23b58713 100644
--- a/youtube_dl/extractor/theintercept.py
+++ b/youtube_dlc/extractor/theintercept.py
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dlc/extractor/theplatform.py
index 07055513a..07055513a 100644
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dlc/extractor/theplatform.py
diff --git a/youtube_dl/extractor/thescene.py b/youtube_dlc/extractor/thescene.py
index cd642355c..cd642355c 100644
--- a/youtube_dl/extractor/thescene.py
+++ b/youtube_dlc/extractor/thescene.py
diff --git a/youtube_dl/extractor/thestar.py b/youtube_dlc/extractor/thestar.py
index c3f118894..c3f118894 100644
--- a/youtube_dl/extractor/thestar.py
+++ b/youtube_dlc/extractor/thestar.py
diff --git a/youtube_dlc/extractor/thesun.py b/youtube_dlc/extractor/thesun.py
new file mode 100644
index 000000000..15d4a6932
--- /dev/null
+++ b/youtube_dlc/extractor/thesun.py
@@ -0,0 +1,38 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import extract_attributes
+
+
+class TheSunIE(InfoExtractor):
+ _VALID_URL = r'https://(?:www\.)?thesun\.co\.uk/[^/]+/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.thesun.co.uk/tvandshowbiz/2261604/orlando-bloom-and-katy-perry-post-adorable-instagram-video-together-celebrating-thanksgiving-after-split-rumours/',
+ 'info_dict': {
+ 'id': '2261604',
+ 'title': 'md5:cba22f48bad9218b64d5bbe0e16afddf',
+ },
+ 'playlist_count': 2,
+ }
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ article_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, article_id)
+
+ entries = []
+ for video in re.findall(
+ r'<video[^>]+data-video-id-pending=[^>]+>',
+ webpage):
+ attrs = extract_attributes(video)
+ video_id = attrs['data-video-id-pending']
+ account_id = attrs.get('data-account', '5067014667001')
+ entries.append(self.url_result(
+ self.BRIGHTCOVE_URL_TEMPLATE % (account_id, video_id),
+ 'BrightcoveNew', video_id))
+
+ return self.playlist_result(
+ entries, article_id, self._og_search_title(webpage, fatal=False))
diff --git a/youtube_dl/extractor/theweatherchannel.py b/youtube_dlc/extractor/theweatherchannel.py
index c34a49d03..c34a49d03 100644
--- a/youtube_dl/extractor/theweatherchannel.py
+++ b/youtube_dlc/extractor/theweatherchannel.py
diff --git a/youtube_dl/extractor/thisamericanlife.py b/youtube_dlc/extractor/thisamericanlife.py
index 91e45f2c3..91e45f2c3 100644
--- a/youtube_dl/extractor/thisamericanlife.py
+++ b/youtube_dlc/extractor/thisamericanlife.py
diff --git a/youtube_dl/extractor/thisav.py b/youtube_dlc/extractor/thisav.py
index dc3dd03c8..dc3dd03c8 100644
--- a/youtube_dl/extractor/thisav.py
+++ b/youtube_dlc/extractor/thisav.py
diff --git a/youtube_dlc/extractor/thisoldhouse.py b/youtube_dlc/extractor/thisoldhouse.py
new file mode 100644
index 000000000..a3d9b4017
--- /dev/null
+++ b/youtube_dlc/extractor/thisoldhouse.py
@@ -0,0 +1,47 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class ThisOldHouseIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode|(?:[^/]+/)?\d+)/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ 'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench',
+ 'info_dict': {
+ 'id': '5dcdddf673c3f956ef5db202',
+ 'ext': 'mp4',
+ 'title': 'How to Build a Storage Bench',
+ 'description': 'In the workshop, Tom Silva and Kevin O\'Connor build a storage bench for an entryway.',
+ 'timestamp': 1442548800,
+ 'upload_date': '20150918',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.thisoldhouse.com/tv-episode/ask-toh-shelf-rough-electric',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.thisoldhouse.com/furniture/21017078/how-to-build-a-storage-bench',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.thisoldhouse.com/21113884/s41-e13-paradise-lost',
+ 'only_matching': True,
+ }, {
+ # iframe www.thisoldhouse.com
+ 'url': 'https://www.thisoldhouse.com/21083431/seaside-transformation-the-westerly-project',
+ 'only_matching': True,
+ }]
+ _ZYPE_TMPL = 'https://player.zype.com/embed/%s.html?api_key=hsOk_yMSPYNrT22e9pu8hihLXjaZf0JW5jsOWv4ZqyHJFvkJn6rtToHl09tbbsbe'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+ video_id = self._search_regex(
+ r'<iframe[^>]+src=[\'"](?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})',
+ webpage, 'video id')
+ return self.url_result(self._ZYPE_TMPL % video_id, 'Zype', video_id)
diff --git a/youtube_dl/extractor/threeqsdn.py b/youtube_dlc/extractor/threeqsdn.py
index f26937da1..f26937da1 100644
--- a/youtube_dl/extractor/threeqsdn.py
+++ b/youtube_dlc/extractor/threeqsdn.py
diff --git a/youtube_dl/extractor/tiktok.py b/youtube_dlc/extractor/tiktok.py
index 66088b9ab..66088b9ab 100644
--- a/youtube_dl/extractor/tiktok.py
+++ b/youtube_dlc/extractor/tiktok.py
diff --git a/youtube_dl/extractor/tinypic.py b/youtube_dlc/extractor/tinypic.py
index bc2def508..bc2def508 100644
--- a/youtube_dl/extractor/tinypic.py
+++ b/youtube_dlc/extractor/tinypic.py
diff --git a/youtube_dl/extractor/tmz.py b/youtube_dlc/extractor/tmz.py
index 419f9d92e..419f9d92e 100644
--- a/youtube_dl/extractor/tmz.py
+++ b/youtube_dlc/extractor/tmz.py
diff --git a/youtube_dl/extractor/tnaflix.py b/youtube_dlc/extractor/tnaflix.py
index b3573c6e0..b3573c6e0 100644
--- a/youtube_dl/extractor/tnaflix.py
+++ b/youtube_dlc/extractor/tnaflix.py
diff --git a/youtube_dlc/extractor/toggle.py b/youtube_dlc/extractor/toggle.py
new file mode 100644
index 000000000..ca2e36efe
--- /dev/null
+++ b/youtube_dlc/extractor/toggle.py
@@ -0,0 +1,213 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ parse_iso8601,
+ sanitized_Request,
+)
+
+
+class ToggleIE(InfoExtractor):
+ IE_NAME = 'toggle'
+ _VALID_URL = r'https?://(?:(?:www\.)?mewatch|video\.toggle)\.sg/(?:en|zh)/(?:[^/]+/){2,}(?P<id>[0-9]+)'
+ _TESTS = [{
+ 'url': 'http://www.mewatch.sg/en/series/lion-moms-tif/trailers/lion-moms-premier/343115',
+ 'info_dict': {
+ 'id': '343115',
+ 'ext': 'mp4',
+ 'title': 'Lion Moms Premiere',
+ 'description': 'md5:aea1149404bff4d7f7b6da11fafd8e6b',
+ 'upload_date': '20150910',
+ 'timestamp': 1441858274,
+ },
+ 'params': {
+ 'skip_download': 'm3u8 download',
+ }
+ }, {
+ 'note': 'DRM-protected video',
+ 'url': 'http://www.mewatch.sg/en/movies/dug-s-special-mission/341413',
+ 'info_dict': {
+ 'id': '341413',
+ 'ext': 'wvm',
+ 'title': 'Dug\'s Special Mission',
+ 'description': 'md5:e86c6f4458214905c1772398fabc93e0',
+ 'upload_date': '20150827',
+ 'timestamp': 1440644006,
+ },
+ 'params': {
+ 'skip_download': 'DRM-protected wvm download',
+ }
+ }, {
+ # this also tests correct video id extraction
+ 'note': 'm3u8 links are geo-restricted, but Android/mp4 is okay',
+ 'url': 'http://www.mewatch.sg/en/series/28th-sea-games-5-show/28th-sea-games-5-show-ep11/332861',
+ 'info_dict': {
+ 'id': '332861',
+ 'ext': 'mp4',
+ 'title': '28th SEA Games (5 Show) - Episode 11',
+ 'description': 'md5:3cd4f5f56c7c3b1340c50a863f896faa',
+ 'upload_date': '20150605',
+ 'timestamp': 1433480166,
+ },
+ 'params': {
+ 'skip_download': 'DRM-protected wvm download',
+ },
+ 'skip': 'm3u8 links are geo-restricted'
+ }, {
+ 'url': 'http://video.toggle.sg/en/clips/seraph-sun-aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set/343331',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.mewatch.sg/en/clips/seraph-sun-aloysius-will-suddenly-sing-some-old-songs-in-high-pitch-on-set/343331',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.mewatch.sg/zh/series/zero-calling-s2-hd/ep13/336367',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.mewatch.sg/en/series/vetri-s2/webisodes/jeeva-is-an-orphan-vetri-s2-webisode-7/342302',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.mewatch.sg/en/movies/seven-days/321936',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.mewatch.sg/en/tv-show/news/may-2017-cna-singapore-tonight/fri-19-may-2017/512456',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.mewatch.sg/en/channels/eleven-plus/401585',
+ 'only_matching': True,
+ }]
+
+ _FORMAT_PREFERENCES = {
+ 'wvm-STBMain': -10,
+ 'wvm-iPadMain': -20,
+ 'wvm-iPhoneMain': -30,
+ 'wvm-Android': -40,
+ }
+ _API_USER = 'tvpapi_147'
+ _API_PASS = '11111'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ url, video_id, note='Downloading video page')
+
+ api_user = self._search_regex(
+ r'apiUser\s*:\s*(["\'])(?P<user>.+?)\1', webpage, 'apiUser',
+ default=self._API_USER, group='user')
+ api_pass = self._search_regex(
+ r'apiPass\s*:\s*(["\'])(?P<pass>.+?)\1', webpage, 'apiPass',
+ default=self._API_PASS, group='pass')
+
+ params = {
+ 'initObj': {
+ 'Locale': {
+ 'LocaleLanguage': '',
+ 'LocaleCountry': '',
+ 'LocaleDevice': '',
+ 'LocaleUserState': 0
+ },
+ 'Platform': 0,
+ 'SiteGuid': 0,
+ 'DomainID': '0',
+ 'UDID': '',
+ 'ApiUser': api_user,
+ 'ApiPass': api_pass
+ },
+ 'MediaID': video_id,
+ 'mediaType': 0,
+ }
+
+ req = sanitized_Request(
+ 'http://tvpapi.as.tvinci.com/v2_9/gateways/jsonpostgw.aspx?m=GetMediaInfo',
+ json.dumps(params).encode('utf-8'))
+ info = self._download_json(req, video_id, 'Downloading video info json')
+
+ title = info['MediaName']
+
+ formats = []
+ for video_file in info.get('Files', []):
+ video_url, vid_format = video_file.get('URL'), video_file.get('Format')
+ if not video_url or video_url == 'NA' or not vid_format:
+ continue
+ ext = determine_ext(video_url)
+ vid_format = vid_format.replace(' ', '')
+ # if geo-restricted, m3u8 is inaccessible, but mp4 is okay
+ if ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, ext='mp4', m3u8_id=vid_format,
+ note='Downloading %s m3u8 information' % vid_format,
+ errnote='Failed to download %s m3u8 information' % vid_format,
+ fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ video_url, video_id, mpd_id=vid_format,
+ note='Downloading %s MPD manifest' % vid_format,
+ errnote='Failed to download %s MPD manifest' % vid_format,
+ fatal=False))
+ elif ext == 'ism':
+ formats.extend(self._extract_ism_formats(
+ video_url, video_id, ism_id=vid_format,
+ note='Downloading %s ISM manifest' % vid_format,
+ errnote='Failed to download %s ISM manifest' % vid_format,
+ fatal=False))
+ elif ext in ('mp4', 'wvm'):
+ # wvm are drm-protected files
+ formats.append({
+ 'ext': ext,
+ 'url': video_url,
+ 'format_id': vid_format,
+ 'preference': self._FORMAT_PREFERENCES.get(ext + '-' + vid_format) or -1,
+ 'format_note': 'DRM-protected video' if ext == 'wvm' else None
+ })
+ if not formats:
+ # Most likely because geo-blocked
+ raise ExtractorError('No downloadable videos found', expected=True)
+ self._sort_formats(formats)
+
+ duration = int_or_none(info.get('Duration'))
+ description = info.get('Description')
+ created_at = parse_iso8601(info.get('CreationDate') or None)
+
+ average_rating = float_or_none(info.get('Rating'))
+ view_count = int_or_none(info.get('ViewCounter') or info.get('view_counter'))
+ like_count = int_or_none(info.get('LikeCounter') or info.get('like_counter'))
+
+ thumbnails = []
+ for picture in info.get('Pictures', []):
+ if not isinstance(picture, dict):
+ continue
+ pic_url = picture.get('URL')
+ if not pic_url:
+ continue
+ thumbnail = {
+ 'url': pic_url,
+ }
+ pic_size = picture.get('PicSize', '')
+ m = re.search(r'(?P<width>\d+)[xX](?P<height>\d+)', pic_size)
+ if m:
+ thumbnail.update({
+ 'width': int(m.group('width')),
+ 'height': int(m.group('height')),
+ })
+ thumbnails.append(thumbnail)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'timestamp': created_at,
+ 'average_rating': average_rating,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'thumbnails': thumbnails,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/tonline.py b/youtube_dlc/extractor/tonline.py
index cc11eae2a..cc11eae2a 100644
--- a/youtube_dl/extractor/tonline.py
+++ b/youtube_dlc/extractor/tonline.py
diff --git a/youtube_dl/extractor/toongoggles.py b/youtube_dlc/extractor/toongoggles.py
index b5ba1c01d..b5ba1c01d 100644
--- a/youtube_dl/extractor/toongoggles.py
+++ b/youtube_dlc/extractor/toongoggles.py
diff --git a/youtube_dl/extractor/toutv.py b/youtube_dlc/extractor/toutv.py
index 44b022fca..44b022fca 100644
--- a/youtube_dl/extractor/toutv.py
+++ b/youtube_dlc/extractor/toutv.py
diff --git a/youtube_dl/extractor/toypics.py b/youtube_dlc/extractor/toypics.py
index f705a06c9..f705a06c9 100644
--- a/youtube_dl/extractor/toypics.py
+++ b/youtube_dlc/extractor/toypics.py
diff --git a/youtube_dl/extractor/traileraddict.py b/youtube_dlc/extractor/traileraddict.py
index 747370d12..747370d12 100644
--- a/youtube_dl/extractor/traileraddict.py
+++ b/youtube_dlc/extractor/traileraddict.py
diff --git a/youtube_dl/extractor/trilulilu.py b/youtube_dlc/extractor/trilulilu.py
index a800449e9..a800449e9 100644
--- a/youtube_dl/extractor/trilulilu.py
+++ b/youtube_dlc/extractor/trilulilu.py
diff --git a/youtube_dlc/extractor/trunews.py b/youtube_dlc/extractor/trunews.py
new file mode 100644
index 000000000..cca5b5ceb
--- /dev/null
+++ b/youtube_dlc/extractor/trunews.py
@@ -0,0 +1,34 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class TruNewsIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?trunews\.com/stream/(?P<id>[^/?#&]+)'
+ _TEST = {
+ 'url': 'https://www.trunews.com/stream/will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech',
+ 'info_dict': {
+ 'id': '5c5a21e65d3c196e1c0020cc',
+ 'display_id': 'will-democrats-stage-a-circus-during-president-trump-s-state-of-the-union-speech',
+ 'ext': 'mp4',
+ 'title': "Will Democrats Stage a Circus During President Trump's State of the Union Speech?",
+ 'description': 'md5:c583b72147cc92cf21f56a31aff7a670',
+ 'duration': 3685,
+ 'timestamp': 1549411440,
+ 'upload_date': '20190206',
+ },
+ 'add_ie': ['Zype'],
+ }
+ _ZYPE_TEMPL = 'https://player.zype.com/embed/%s.js?api_key=X5XnahkjCwJrT_l5zUqypnaLEObotyvtUKJWWlONxDoHVjP8vqxlArLV8llxMbyt'
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ zype_id = self._download_json(
+ 'https://api.zype.com/videos', display_id, query={
+ 'app_key': 'PUVKp9WgGUb3-JUw6EqafLx8tFVP6VKZTWbUOR-HOm__g4fNDt1bCsm_LgYf_k9H',
+ 'per_page': 1,
+ 'active': 'true',
+ 'friendly_title': display_id,
+ })['response'][0]['_id']
+ return self.url_result(self._ZYPE_TEMPL % zype_id, 'Zype', zype_id)
diff --git a/youtube_dl/extractor/trutv.py b/youtube_dlc/extractor/trutv.py
index ce892c8c5..ce892c8c5 100644
--- a/youtube_dl/extractor/trutv.py
+++ b/youtube_dlc/extractor/trutv.py
diff --git a/youtube_dl/extractor/tube8.py b/youtube_dlc/extractor/tube8.py
index db93b0182..db93b0182 100644
--- a/youtube_dl/extractor/tube8.py
+++ b/youtube_dlc/extractor/tube8.py
diff --git a/youtube_dl/extractor/tubitv.py b/youtube_dlc/extractor/tubitv.py
index a51fa6515..a51fa6515 100644
--- a/youtube_dl/extractor/tubitv.py
+++ b/youtube_dlc/extractor/tubitv.py
diff --git a/youtube_dl/extractor/tudou.py b/youtube_dlc/extractor/tudou.py
index 7421378a8..7421378a8 100644
--- a/youtube_dl/extractor/tudou.py
+++ b/youtube_dlc/extractor/tudou.py
diff --git a/youtube_dlc/extractor/tumblr.py b/youtube_dlc/extractor/tumblr.py
new file mode 100644
index 000000000..ae584ad69
--- /dev/null
+++ b/youtube_dlc/extractor/tumblr.py
@@ -0,0 +1,213 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ urlencode_postdata
+)
+
+
+class TumblrIE(InfoExtractor):
+ _VALID_URL = r'https?://(?P<blog_name>[^/?#&]+)\.tumblr\.com/(?:post|video)/(?P<id>[0-9]+)(?:$|[/?#])'
+ _NETRC_MACHINE = 'tumblr'
+ _LOGIN_URL = 'https://www.tumblr.com/login'
+ _TESTS = [{
+ 'url': 'http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes',
+ 'md5': '479bb068e5b16462f5176a6828829767',
+ 'info_dict': {
+ 'id': '54196191430',
+ 'ext': 'mp4',
+ 'title': 'tatiana maslany news, Orphan Black || DVD extra - behind the scenes ↳...',
+ 'description': 'md5:37db8211e40b50c7c44e95da14f630b7',
+ 'thumbnail': r're:http://.*\.jpg',
+ }
+ }, {
+ 'url': 'http://5sostrum.tumblr.com/post/90208453769/yall-forgetting-the-greatest-keek-of-them-all',
+ 'md5': 'bf348ef8c0ef84fbf1cbd6fa6e000359',
+ 'info_dict': {
+ 'id': '90208453769',
+ 'ext': 'mp4',
+ 'title': '5SOS STRUM ;]',
+ 'description': 'md5:dba62ac8639482759c8eb10ce474586a',
+ 'thumbnail': r're:http://.*\.jpg',
+ }
+ }, {
+ 'url': 'http://hdvideotest.tumblr.com/post/130323439814/test-description-for-my-hd-video',
+ 'md5': '7ae503065ad150122dc3089f8cf1546c',
+ 'info_dict': {
+ 'id': '130323439814',
+ 'ext': 'mp4',
+ 'title': 'HD Video Testing \u2014 Test description for my HD video',
+ 'description': 'md5:97cc3ab5fcd27ee4af6356701541319c',
+ 'thumbnail': r're:http://.*\.jpg',
+ },
+ 'params': {
+ 'format': 'hd',
+ },
+ }, {
+ 'url': 'http://naked-yogi.tumblr.com/post/118312946248/naked-smoking-stretching',
+ 'md5': 'de07e5211d60d4f3a2c3df757ea9f6ab',
+ 'info_dict': {
+ 'id': 'Wmur',
+ 'ext': 'mp4',
+ 'title': 'naked smoking & stretching',
+ 'upload_date': '20150506',
+ 'timestamp': 1430931613,
+ 'age_limit': 18,
+ 'uploader_id': '1638622',
+ 'uploader': 'naked-yogi',
+ },
+ 'add_ie': ['Vidme'],
+ }, {
+ 'url': 'http://camdamage.tumblr.com/post/98846056295/',
+ 'md5': 'a9e0c8371ea1ca306d6554e3fecf50b6',
+ 'info_dict': {
+ 'id': '105463834',
+ 'ext': 'mp4',
+ 'title': 'Cam Damage-HD 720p',
+ 'uploader': 'John Moyer',
+ 'uploader_id': 'user32021558',
+ },
+ 'add_ie': ['Vimeo'],
+ }, {
+ 'url': 'http://sutiblr.tumblr.com/post/139638707273',
+ 'md5': '2dd184b3669e049ba40563a7d423f95c',
+ 'info_dict': {
+ 'id': 'ir7qBEIKqvq',
+ 'ext': 'mp4',
+ 'title': 'Vine by sutiblr',
+ 'alt_title': 'Vine by sutiblr',
+ 'uploader': 'sutiblr',
+ 'uploader_id': '1198993975374495744',
+ 'upload_date': '20160220',
+ 'like_count': int,
+ 'comment_count': int,
+ 'repost_count': int,
+ },
+ 'add_ie': ['Vine'],
+ }, {
+ 'url': 'http://vitasidorkina.tumblr.com/post/134652425014/joskriver-victoriassecret-invisibility-or',
+ 'md5': '01c12ceb82cbf6b2fe0703aa56b3ad72',
+ 'info_dict': {
+ 'id': '-7LnUPGlSo',
+ 'ext': 'mp4',
+ 'title': 'Video by victoriassecret',
+ 'description': 'Invisibility or flight…which superpower would YOU choose? #VSFashionShow #ThisOrThat',
+ 'uploader_id': 'victoriassecret',
+ 'thumbnail': r're:^https?://.*\.jpg'
+ },
+ 'add_ie': ['Instagram'],
+ }]
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login page')
+
+ login_form = self._hidden_inputs(login_page)
+ login_form.update({
+ 'user[email]': username,
+ 'user[password]': password
+ })
+
+ response, urlh = self._download_webpage_handle(
+ self._LOGIN_URL, None, 'Logging in',
+ data=urlencode_postdata(login_form), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ 'Referer': self._LOGIN_URL,
+ })
+
+ # Successful login
+ if '/dashboard' in urlh.geturl():
+ return
+
+ login_errors = self._parse_json(
+ self._search_regex(
+ r'RegistrationForm\.errors\s*=\s*(\[.+?\])\s*;', response,
+ 'login errors', default='[]'),
+ None, fatal=False)
+ if login_errors:
+ raise ExtractorError(
+ 'Unable to login: %s' % login_errors[0], expected=True)
+
+ self.report_warning('Login has probably failed')
+
+ def _real_extract(self, url):
+ m_url = re.match(self._VALID_URL, url)
+ video_id = m_url.group('id')
+ blog = m_url.group('blog_name')
+
+ url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id)
+ webpage, urlh = self._download_webpage_handle(url, video_id)
+
+ redirect_url = urlh.geturl()
+ if 'tumblr.com/safe-mode' in redirect_url or redirect_url.startswith('/safe-mode'):
+ raise ExtractorError(
+ 'This Tumblr may contain sensitive media. '
+ 'Disable safe mode in your account settings '
+ 'at https://www.tumblr.com/settings/account#safe_mode',
+ expected=True)
+
+ iframe_url = self._search_regex(
+ r'src=\'(https?://www\.tumblr\.com/video/[^\']+)\'',
+ webpage, 'iframe url', default=None)
+ if iframe_url is None:
+ return self.url_result(redirect_url, 'Generic')
+
+ iframe = self._download_webpage(iframe_url, video_id, 'Downloading iframe page')
+
+ duration = None
+ sources = []
+
+ sd_url = self._search_regex(
+ r'<source[^>]+src=(["\'])(?P<url>.+?)\1', iframe,
+ 'sd video url', default=None, group='url')
+ if sd_url:
+ sources.append((sd_url, 'sd'))
+
+ options = self._parse_json(
+ self._search_regex(
+ r'data-crt-options=(["\'])(?P<options>.+?)\1', iframe,
+ 'hd video url', default='', group='options'),
+ video_id, fatal=False)
+ if options:
+ duration = int_or_none(options.get('duration'))
+ hd_url = options.get('hdUrl')
+ if hd_url:
+ sources.append((hd_url, 'hd'))
+
+ formats = [{
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'format_id': format_id,
+ 'height': int_or_none(self._search_regex(
+ r'/(\d{3,4})$', video_url, 'height', default=None)),
+ 'quality': quality,
+ } for quality, (video_url, format_id) in enumerate(sources)]
+
+ self._sort_formats(formats)
+
+ # The only place where you can get a title, it's not complete,
+ # but searching in other places doesn't work for all videos
+ video_title = self._html_search_regex(
+ r'(?s)<title>(?P<title>.*?)(?: \| Tumblr)?</title>',
+ webpage, 'title')
+
+ return {
+ 'id': video_id,
+ 'title': video_title,
+ 'description': self._og_search_description(webpage, default=None),
+ 'thumbnail': self._og_search_thumbnail(webpage, default=None),
+ 'duration': duration,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/tunein.py b/youtube_dlc/extractor/tunein.py
index c7a5f5a63..c7a5f5a63 100644
--- a/youtube_dl/extractor/tunein.py
+++ b/youtube_dlc/extractor/tunein.py
diff --git a/youtube_dl/extractor/tunepk.py b/youtube_dlc/extractor/tunepk.py
index 9d42651ce..9d42651ce 100644
--- a/youtube_dl/extractor/tunepk.py
+++ b/youtube_dlc/extractor/tunepk.py
diff --git a/youtube_dl/extractor/turbo.py b/youtube_dlc/extractor/turbo.py
index be3eaa5c2..be3eaa5c2 100644
--- a/youtube_dl/extractor/turbo.py
+++ b/youtube_dlc/extractor/turbo.py
diff --git a/youtube_dl/extractor/turner.py b/youtube_dlc/extractor/turner.py
index 4a6cbfbb8..4a6cbfbb8 100644
--- a/youtube_dl/extractor/turner.py
+++ b/youtube_dlc/extractor/turner.py
diff --git a/youtube_dlc/extractor/tv2.py b/youtube_dlc/extractor/tv2.py
new file mode 100644
index 000000000..4a19b9be6
--- /dev/null
+++ b/youtube_dlc/extractor/tv2.py
@@ -0,0 +1,192 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ float_or_none,
+ js_to_json,
+ parse_iso8601,
+ remove_end,
+ strip_or_none,
+ try_get,
+)
+
+
+class TV2IE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tv2\.no/v/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.tv2.no/v/916509/',
+ 'info_dict': {
+ 'id': '916509',
+ 'ext': 'flv',
+ 'title': 'Se Frode Gryttens hyllest av Steven Gerrard',
+ 'description': 'TV 2 Sportens huspoet tar avskjed med Liverpools kaptein Steven Gerrard.',
+ 'timestamp': 1431715610,
+ 'upload_date': '20150515',
+ 'duration': 156.967,
+ 'view_count': int,
+ 'categories': list,
+ },
+ }
+ _API_DOMAIN = 'sumo.tv2.no'
+ _PROTOCOLS = ('HDS', 'HLS', 'DASH')
+ _GEO_COUNTRIES = ['NO']
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ api_base = 'http://%s/api/web/asset/%s' % (self._API_DOMAIN, video_id)
+
+ formats = []
+ format_urls = []
+ for protocol in self._PROTOCOLS:
+ try:
+ data = self._download_json(
+ api_base + '/play.json?protocol=%s&videoFormat=SMIL+ISMUSP' % protocol,
+ video_id, 'Downloading play JSON')['playback']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ error = self._parse_json(e.cause.read().decode(), video_id)['error']
+ error_code = error.get('code')
+ if error_code == 'ASSET_PLAYBACK_INVALID_GEO_LOCATION':
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+ elif error_code == 'SESSION_NOT_AUTHENTICATED':
+ self.raise_login_required()
+ raise ExtractorError(error['description'])
+ raise
+ items = try_get(data, lambda x: x['items']['item'])
+ if not items:
+ continue
+ if not isinstance(items, list):
+ items = [items]
+ for item in items:
+ if not isinstance(item, dict):
+ continue
+ video_url = item.get('url')
+ if not video_url or video_url in format_urls:
+ continue
+ format_id = '%s-%s' % (protocol.lower(), item.get('mediaFormat'))
+ if not self._is_valid_url(video_url, video_id, format_id):
+ continue
+ format_urls.append(video_url)
+ ext = determine_ext(video_url)
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ video_url, video_id, f4m_id=format_id, fatal=False))
+ elif ext == 'm3u8':
+ if not data.get('drmProtected'):
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id=format_id, fatal=False))
+ elif ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ video_url, video_id, format_id, fatal=False))
+ elif ext == 'ism' or video_url.endswith('.ism/Manifest'):
+ pass
+ else:
+ formats.append({
+ 'url': video_url,
+ 'format_id': format_id,
+ 'tbr': int_or_none(item.get('bitrate')),
+ 'filesize': int_or_none(item.get('fileSize')),
+ })
+ if not formats and data.get('drmProtected'):
+ raise ExtractorError('This video is DRM protected.', expected=True)
+ self._sort_formats(formats)
+
+ asset = self._download_json(
+ api_base + '.json', video_id,
+ 'Downloading metadata JSON')['asset']
+ title = asset['title']
+
+ thumbnails = [{
+ 'id': thumbnail.get('@type'),
+ 'url': thumbnail.get('url'),
+ } for _, thumbnail in (asset.get('imageVersions') or {}).items()]
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'description': strip_or_none(asset.get('description')),
+ 'thumbnails': thumbnails,
+ 'timestamp': parse_iso8601(asset.get('createTime')),
+ 'duration': float_or_none(asset.get('accurateDuration') or asset.get('duration')),
+ 'view_count': int_or_none(asset.get('views')),
+ 'categories': asset.get('keywords', '').split(','),
+ 'formats': formats,
+ }
+
+
+class TV2ArticleIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?tv2\.no/(?:a|\d{4}/\d{2}/\d{2}(/[^/]+)+)/(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'http://www.tv2.no/2015/05/16/nyheter/alesund/krim/pingvin/6930542',
+ 'info_dict': {
+ 'id': '6930542',
+ 'title': 'Russen hetses etter pingvintyveri - innrømmer å ha åpnet luken på buret',
+ 'description': 'De fire siktede nekter fortsatt for å ha stjålet pingvinbabyene, men innrømmer å ha åpnet luken til de små kyllingene.',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'http://www.tv2.no/a/6930542',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ # Old embed pattern (looks unused nowadays)
+ assets = re.findall(r'data-assetid=["\'](\d+)', webpage)
+
+ if not assets:
+ # New embed pattern
+ for v in re.findall(r'(?s)TV2ContentboxVideo\(({.+?})\)', webpage):
+ video = self._parse_json(
+ v, playlist_id, transform_source=js_to_json, fatal=False)
+ if not video:
+ continue
+ asset = video.get('assetId')
+ if asset:
+ assets.append(asset)
+
+ entries = [
+ self.url_result('http://www.tv2.no/v/%s' % asset_id, 'TV2')
+ for asset_id in assets]
+
+ title = remove_end(self._og_search_title(webpage), ' - TV2.no')
+ description = remove_end(self._og_search_description(webpage), ' - TV2.no')
+
+ return self.playlist_result(entries, playlist_id, title, description)
+
+
+class KatsomoIE(TV2IE):
+ _VALID_URL = r'https?://(?:www\.)?(?:katsomo|mtv)\.fi/(?:#!/)?(?:[^/]+/[0-9a-z-]+-\d+/[0-9a-z-]+-|[^/]+/\d+/[^/]+/)(?P<id>\d+)'
+ _TEST = {
+ 'url': 'https://www.mtv.fi/sarja/mtv-uutiset-live-33001002003/lahden-pelicans-teki-kovan-ratkaisun-ville-nieminen-pihalle-1181321',
+ 'info_dict': {
+ 'id': '1181321',
+ 'ext': 'mp4',
+ 'title': 'MTV Uutiset Live',
+ 'description': 'Päätöksen teki Pelicansin hallitus.',
+ 'timestamp': 1575116484,
+ 'upload_date': '20191130',
+ 'duration': 37.12,
+ 'view_count': int,
+ 'categories': list,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+ _API_DOMAIN = 'api.katsomo.fi'
+ _PROTOCOLS = ('HLS', 'MPD')
+ _GEO_COUNTRIES = ['FI']
diff --git a/youtube_dlc/extractor/tv2dk.py b/youtube_dlc/extractor/tv2dk.py
new file mode 100644
index 000000000..8bda9348d
--- /dev/null
+++ b/youtube_dlc/extractor/tv2dk.py
@@ -0,0 +1,154 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ extract_attributes,
+ js_to_json,
+ url_or_none,
+)
+
+
+class TV2DKIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?
+ (?:
+ tvsyd|
+ tv2ostjylland|
+ tvmidtvest|
+ tv2fyn|
+ tv2east|
+ tv2lorry|
+ tv2nord
+ )\.dk/
+ (:[^/]+/)*
+ (?P<id>[^/?\#&]+)
+ '''
+ _TESTS = [{
+ 'url': 'https://www.tvsyd.dk/nyheder/28-10-2019/1930/1930-28-okt-2019?autoplay=1#player',
+ 'info_dict': {
+ 'id': '0_52jmwa0p',
+ 'ext': 'mp4',
+ 'title': '19:30 - 28. okt. 2019',
+ 'timestamp': 1572290248,
+ 'upload_date': '20191028',
+ 'uploader_id': 'tvsyd',
+ 'duration': 1347,
+ 'view_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['Kaltura'],
+ }, {
+ 'url': 'https://www.tv2ostjylland.dk/artikel/minister-gaar-ind-i-sag-om-diabetes-teknologi',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tv2ostjylland.dk/nyheder/28-10-2019/22/2200-nyhederne-mandag-d-28-oktober-2019?autoplay=1#player',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tvmidtvest.dk/nyheder/27-10-2019/1930/1930-27-okt-2019',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tv2fyn.dk/artikel/fyn-kan-faa-landets-foerste-fabrik-til-groent-jetbraendstof',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tv2east.dk/artikel/gods-faar-indleveret-tonsvis-af-aebler-100-kilo-aebler-gaar-til-en-aeblebrandy',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tv2lorry.dk/koebenhavn/rasmus-paludan-evakueret-til-egen-demonstration#player',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tv2nord.dk/artikel/dybt-uacceptabelt',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ entries = []
+ for video_el in re.findall(r'(?s)<[^>]+\bdata-entryid\s*=[^>]*>', webpage):
+ video = extract_attributes(video_el)
+ kaltura_id = video.get('data-entryid')
+ if not kaltura_id:
+ continue
+ partner_id = video.get('data-partnerid')
+ if not partner_id:
+ continue
+ entries.append(self.url_result(
+ 'kaltura:%s:%s' % (partner_id, kaltura_id), 'Kaltura',
+ video_id=kaltura_id))
+ return self.playlist_result(entries)
+
+
+class TV2DKBornholmPlayIE(InfoExtractor):
+ _VALID_URL = r'https?://play\.tv2bornholm\.dk/\?.*?\bid=(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://play.tv2bornholm.dk/?area=specifikTV&id=781021',
+ 'info_dict': {
+ 'id': '781021',
+ 'ext': 'mp4',
+ 'title': '12Nyheder-27.11.19',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._download_json(
+ 'https://play.tv2bornholm.dk/controls/AJAX.aspx/specifikVideo', video_id,
+ data=json.dumps({
+ 'playlist_id': video_id,
+ 'serienavn': '',
+ }).encode(), headers={
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'Content-Type': 'application/json; charset=UTF-8',
+ })['d']
+
+ # TODO: generalize flowplayer
+ title = self._search_regex(
+ r'title\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', video, 'title',
+ group='value')
+ sources = self._parse_json(self._search_regex(
+ r'(?s)sources:\s*(\[.+?\]),', video, 'sources'),
+ video_id, js_to_json)
+
+ formats = []
+ srcs = set()
+ for source in sources:
+ src = url_or_none(source.get('src'))
+ if not src:
+ continue
+ if src in srcs:
+ continue
+ srcs.add(src)
+ ext = determine_ext(src)
+ src_type = source.get('type')
+ if src_type == 'application/x-mpegurl' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ src, video_id, ext='mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ elif src_type == 'application/dash+xml' or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ src, video_id, mpd_id='dash', fatal=False))
+ else:
+ formats.append({
+ 'url': src,
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/tv2hu.py b/youtube_dlc/extractor/tv2hu.py
index 86017b757..86017b757 100644
--- a/youtube_dl/extractor/tv2hu.py
+++ b/youtube_dlc/extractor/tv2hu.py
diff --git a/youtube_dlc/extractor/tv4.py b/youtube_dlc/extractor/tv4.py
new file mode 100644
index 000000000..c498b0191
--- /dev/null
+++ b/youtube_dlc/extractor/tv4.py
@@ -0,0 +1,124 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ parse_iso8601,
+)
+
+
+class TV4IE(InfoExtractor):
+ IE_DESC = 'tv4.se and tv4play.se'
+ _VALID_URL = r'''(?x)https?://(?:www\.)?
+ (?:
+ tv4\.se/(?:[^/]+)/klipp/(?:.*)-|
+ tv4play\.se/
+ (?:
+ (?:program|barn)/(?:[^/]+/|(?:[^\?]+)\?video_id=)|
+ iframe/video/|
+ film/|
+ sport/|
+ )
+ )(?P<id>[0-9]+)'''
+ _GEO_COUNTRIES = ['SE']
+ _TESTS = [
+ {
+ 'url': 'http://www.tv4.se/kalla-fakta/klipp/kalla-fakta-5-english-subtitles-2491650',
+ 'md5': 'cb837212f342d77cec06e6dad190e96d',
+ 'info_dict': {
+ 'id': '2491650',
+ 'ext': 'mp4',
+ 'title': 'Kalla Fakta 5 (english subtitles)',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': int,
+ 'upload_date': '20131125',
+ },
+ },
+ {
+ 'url': 'http://www.tv4play.se/iframe/video/3054113',
+ 'md5': 'cb837212f342d77cec06e6dad190e96d',
+ 'info_dict': {
+ 'id': '3054113',
+ 'ext': 'mp4',
+ 'title': 'Så här jobbar ficktjuvarna - se avslöjande bilder',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'Unika bilder avslöjar hur turisternas fickor vittjas mitt på Stockholms central. Två experter på ficktjuvarna avslöjar knepen du ska se upp för.',
+ 'timestamp': int,
+ 'upload_date': '20150130',
+ },
+ },
+ {
+ 'url': 'http://www.tv4play.se/sport/3060959',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.tv4play.se/film/2378136',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.tv4play.se/barn/looney-tunes?video_id=3062412',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.tv4play.se/program/farang/3922081',
+ 'only_matching': True,
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ info = self._download_json(
+ 'https://playback-api.b17g.net/asset/%s' % video_id,
+ video_id, 'Downloading video info JSON', query={
+ 'service': 'tv4',
+ 'device': 'browser',
+ 'protocol': 'hls,dash',
+ 'drm': 'widevine',
+ })['metadata']
+
+ title = info['title']
+
+ manifest_url = self._download_json(
+ 'https://playback-api.b17g.net/media/' + video_id,
+ video_id, query={
+ 'service': 'tv4',
+ 'device': 'browser',
+ 'protocol': 'hls',
+ })['playbackItem']['manifestUrl']
+ formats = self._extract_m3u8_formats(
+ manifest_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False)
+ formats.extend(self._extract_mpd_formats(
+ manifest_url.replace('.m3u8', '.mpd'),
+ video_id, mpd_id='dash', fatal=False))
+ formats.extend(self._extract_f4m_formats(
+ manifest_url.replace('.m3u8', '.f4m'),
+ video_id, f4m_id='hds', fatal=False))
+ formats.extend(self._extract_ism_formats(
+ re.sub(r'\.ism/.*?\.m3u8', r'.ism/Manifest', manifest_url),
+ video_id, ism_id='mss', fatal=False))
+
+ if not formats and info.get('is_geo_restricted'):
+ self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'formats': formats,
+ # 'subtitles': subtitles,
+ 'description': info.get('description'),
+ 'timestamp': parse_iso8601(info.get('broadcast_date_time')),
+ 'duration': int_or_none(info.get('duration')),
+ 'thumbnail': info.get('image'),
+ 'is_live': info.get('isLive') is True,
+ 'series': info.get('seriesTitle'),
+ 'season_number': int_or_none(info.get('seasonNumber')),
+ 'episode': info.get('episodeTitle'),
+ 'episode_number': int_or_none(info.get('episodeNumber')),
+ }
diff --git a/youtube_dlc/extractor/tv5mondeplus.py b/youtube_dlc/extractor/tv5mondeplus.py
new file mode 100644
index 000000000..b7fe082b9
--- /dev/null
+++ b/youtube_dlc/extractor/tv5mondeplus.py
@@ -0,0 +1,117 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ extract_attributes,
+ int_or_none,
+ parse_duration,
+)
+
+
+class TV5MondePlusIE(InfoExtractor):
+ IE_DESC = 'TV5MONDE+'
+ _VALID_URL = r'https?://(?:www\.)?(?:tv5mondeplus|revoir\.tv5monde)\.com/toutes-les-videos/[^/]+/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ # movie
+ 'url': 'https://revoir.tv5monde.com/toutes-les-videos/cinema/rendez-vous-a-atlit',
+ 'md5': '8cbde5ea7b296cf635073e27895e227f',
+ 'info_dict': {
+ 'id': '822a4756-0712-7329-1859-a13ac7fd1407',
+ 'display_id': 'rendez-vous-a-atlit',
+ 'ext': 'mp4',
+ 'title': 'Rendez-vous à Atlit',
+ 'description': 'md5:2893a4c5e1dbac3eedff2d87956e4efb',
+ 'upload_date': '20200130',
+ },
+ }, {
+ # series episode
+ 'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/c-est-la-vie-ennemie-juree',
+ 'info_dict': {
+ 'id': '0df7007c-4900-3936-c601-87a13a93a068',
+ 'display_id': 'c-est-la-vie-ennemie-juree',
+ 'ext': 'mp4',
+ 'title': "C'est la vie - Ennemie jurée",
+ 'description': 'md5:dfb5c63087b6f35fe0cc0af4fe44287e',
+ 'upload_date': '20200130',
+ 'series': "C'est la vie",
+ 'episode': 'Ennemie jurée',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://revoir.tv5monde.com/toutes-les-videos/series-fictions/neuf-jours-en-hiver-neuf-jours-en-hiver',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://revoir.tv5monde.com/toutes-les-videos/info-societe/le-journal-de-la-rts-edition-du-30-01-20-19h30',
+ 'only_matching': True,
+ }]
+ _GEO_BYPASS = False
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+ webpage = self._download_webpage(url, display_id)
+
+ if ">Ce programme n'est malheureusement pas disponible pour votre zone géographique.<" in webpage:
+ self.raise_geo_restricted(countries=['FR'])
+
+ title = episode = self._html_search_regex(r'<h1>([^<]+)', webpage, 'title')
+ vpl_data = extract_attributes(self._search_regex(
+ r'(<[^>]+class="video_player_loader"[^>]+>)',
+ webpage, 'video player loader'))
+
+ video_files = self._parse_json(
+ vpl_data['data-broadcast'], display_id).get('files', [])
+ formats = []
+ for video_file in video_files:
+ v_url = video_file.get('url')
+ if not v_url:
+ continue
+ video_format = video_file.get('format') or determine_ext(v_url)
+ if video_format == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ v_url, display_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': v_url,
+ 'format_id': video_format,
+ })
+ self._sort_formats(formats)
+
+ description = self._html_search_regex(
+ r'(?s)<div[^>]+class=["\']episode-texte[^>]+>(.+?)</div>', webpage,
+ 'description', fatal=False)
+
+ series = self._html_search_regex(
+ r'<p[^>]+class=["\']episode-emission[^>]+>([^<]+)', webpage,
+ 'series', default=None)
+
+ if series and series != title:
+ title = '%s - %s' % (series, title)
+
+ upload_date = self._search_regex(
+ r'(?:date_publication|publish_date)["\']\s*:\s*["\'](\d{4}_\d{2}_\d{2})',
+ webpage, 'upload date', default=None)
+ if upload_date:
+ upload_date = upload_date.replace('_', '')
+
+ video_id = self._search_regex(
+ (r'data-guid=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})',
+ r'id_contenu["\']\s:\s*(\d+)'), webpage, 'video id',
+ default=display_id)
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': vpl_data.get('data-image'),
+ 'duration': int_or_none(vpl_data.get('data-duration')) or parse_duration(self._html_search_meta('duration', webpage)),
+ 'upload_date': upload_date,
+ 'formats': formats,
+ 'series': series,
+ 'episode': episode,
+ }
diff --git a/youtube_dlc/extractor/tva.py b/youtube_dlc/extractor/tva.py
new file mode 100644
index 000000000..443f46e8a
--- /dev/null
+++ b/youtube_dlc/extractor/tva.py
@@ -0,0 +1,57 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import (
+ float_or_none,
+ smuggle_url,
+)
+
+
+class TVAIE(InfoExtractor):
+ _VALID_URL = r'https?://videos?\.tva\.ca/details/_(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://videos.tva.ca/details/_5596811470001',
+ 'info_dict': {
+ 'id': '5596811470001',
+ 'ext': 'mp4',
+ 'title': 'Un extrait de l\'épisode du dimanche 8 octobre 2017 !',
+ 'uploader_id': '5481942443001',
+ 'upload_date': '20171003',
+ 'timestamp': 1507064617,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ }
+ }, {
+ 'url': 'https://video.tva.ca/details/_5596811470001',
+ 'only_matching': True,
+ }]
+ BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5481942443001/default_default/index.html?videoId=%s'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._download_json(
+ 'https://videos.tva.ca/proxy/item/_' + video_id, video_id, headers={
+ 'Accept': 'application/json',
+ }, query={
+ 'appId': '5955fc5f23eec60006c951f1',
+ })
+
+ def get_attribute(key):
+ for attribute in video_data.get('attributes', []):
+ if attribute.get('key') == key:
+ return attribute.get('value')
+ return None
+
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'title': get_attribute('title'),
+ 'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['CA']}),
+ 'description': get_attribute('description'),
+ 'thumbnail': get_attribute('image-background') or get_attribute('image-landscape'),
+ 'duration': float_or_none(get_attribute('video-duration'), 1000),
+ 'ie_key': 'BrightcoveNew',
+ }
diff --git a/youtube_dl/extractor/tvanouvelles.py b/youtube_dlc/extractor/tvanouvelles.py
index 1086176a2..1086176a2 100644
--- a/youtube_dl/extractor/tvanouvelles.py
+++ b/youtube_dlc/extractor/tvanouvelles.py
diff --git a/youtube_dl/extractor/tvc.py b/youtube_dlc/extractor/tvc.py
index 008f64cc2..008f64cc2 100644
--- a/youtube_dl/extractor/tvc.py
+++ b/youtube_dlc/extractor/tvc.py
diff --git a/youtube_dl/extractor/tvigle.py b/youtube_dlc/extractor/tvigle.py
index 180259aba..180259aba 100644
--- a/youtube_dl/extractor/tvigle.py
+++ b/youtube_dlc/extractor/tvigle.py
diff --git a/youtube_dl/extractor/tvland.py b/youtube_dlc/extractor/tvland.py
index 791144128..791144128 100644
--- a/youtube_dl/extractor/tvland.py
+++ b/youtube_dlc/extractor/tvland.py
diff --git a/youtube_dl/extractor/tvn24.py b/youtube_dlc/extractor/tvn24.py
index de0fb5063..de0fb5063 100644
--- a/youtube_dl/extractor/tvn24.py
+++ b/youtube_dlc/extractor/tvn24.py
diff --git a/youtube_dl/extractor/tvnet.py b/youtube_dlc/extractor/tvnet.py
index 4222ff9ee..4222ff9ee 100644
--- a/youtube_dl/extractor/tvnet.py
+++ b/youtube_dlc/extractor/tvnet.py
diff --git a/youtube_dl/extractor/tvnoe.py b/youtube_dlc/extractor/tvnoe.py
index 26a5aeae4..26a5aeae4 100644
--- a/youtube_dl/extractor/tvnoe.py
+++ b/youtube_dlc/extractor/tvnoe.py
diff --git a/youtube_dlc/extractor/tvnow.py b/youtube_dlc/extractor/tvnow.py
new file mode 100644
index 000000000..e2bb62ae8
--- /dev/null
+++ b/youtube_dlc/extractor/tvnow.py
@@ -0,0 +1,644 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ get_element_by_id,
+ int_or_none,
+ parse_iso8601,
+ parse_duration,
+ str_or_none,
+ try_get,
+ update_url_query,
+ urljoin,
+)
+
+
+class TVNowBaseIE(InfoExtractor):
+ _VIDEO_FIELDS = (
+ 'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
+ 'broadcastStartDate', 'isDrm', 'duration', 'season', 'episode',
+ 'manifest.dashclear', 'manifest.hlsclear', 'manifest.smoothclear',
+ 'format.title', 'format.defaultImage169Format', 'format.defaultImage169Logo')
+
+ def _call_api(self, path, video_id, query):
+ return self._download_json(
+ 'https://api.tvnow.de/v3/' + path, video_id, query=query)
+
+ def _extract_video(self, info, display_id):
+ video_id = compat_str(info['id'])
+ title = info['title']
+
+ paths = []
+ for manifest_url in (info.get('manifest') or {}).values():
+ if not manifest_url:
+ continue
+ manifest_url = update_url_query(manifest_url, {'filter': ''})
+ path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path')
+ if path in paths:
+ continue
+ paths.append(path)
+
+ def url_repl(proto, suffix):
+ return re.sub(
+ r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub(
+ r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
+ '.ism/' + suffix, manifest_url))
+
+ def make_urls(proto, suffix):
+ urls = [url_repl(proto, suffix)]
+ hd_url = urls[0].replace('/manifest/', '/ngvod/')
+ if hd_url != urls[0]:
+ urls.append(hd_url)
+ return urls
+
+ for man_url in make_urls('dash', '.mpd'):
+ formats = self._extract_mpd_formats(
+ man_url, video_id, mpd_id='dash', fatal=False)
+ for man_url in make_urls('hss', 'Manifest'):
+ formats.extend(self._extract_ism_formats(
+ man_url, video_id, ism_id='mss', fatal=False))
+ for man_url in make_urls('hls', '.m3u8'):
+ formats.extend(self._extract_m3u8_formats(
+ man_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls',
+ fatal=False))
+ if formats:
+ break
+ else:
+ if info.get('isDrm'):
+ raise ExtractorError(
+ 'Video %s is DRM protected' % video_id, expected=True)
+ if info.get('geoblocked'):
+ raise self.raise_geo_restricted()
+ if not info.get('free', True):
+ raise ExtractorError(
+ 'Video %s is not available for free' % video_id, expected=True)
+ self._sort_formats(formats)
+
+ description = info.get('articleLong') or info.get('articleShort')
+ timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
+ duration = parse_duration(info.get('duration'))
+
+ f = info.get('format', {})
+
+ thumbnails = [{
+ 'url': 'https://aistvnow-a.akamaihd.net/tvnow/movie/%s' % video_id,
+ }]
+ thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
+ if thumbnail:
+ thumbnails.append({
+ 'url': thumbnail,
+ })
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnails': thumbnails,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'series': f.get('title'),
+ 'season_number': int_or_none(info.get('season')),
+ 'episode_number': int_or_none(info.get('episode')),
+ 'episode': title,
+ 'formats': formats,
+ }
+
+
+class TVNowIE(TVNowBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?tvnow\.(?:de|at|ch)/(?P<station>[^/]+)/
+ (?P<show_id>[^/]+)/
+ (?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+)
+ '''
+
+ @classmethod
+ def suitable(cls, url):
+ return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url) or TVNowShowIE.suitable(url)
+ else super(TVNowIE, cls).suitable(url))
+
+ _TESTS = [{
+ 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player',
+ 'info_dict': {
+ 'id': '331082',
+ 'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
+ 'ext': 'mp4',
+ 'title': 'Der neue Porsche 911 GT 3',
+ 'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
+ 'timestamp': 1495994400,
+ 'upload_date': '20170528',
+ 'duration': 5283,
+ 'series': 'GRIP - Das Motormagazin',
+ 'season_number': 14,
+ 'episode_number': 405,
+ 'episode': 'Der neue Porsche 911 GT 3',
+ },
+ }, {
+ # rtl2
+ 'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player',
+ 'only_matching': True,
+ }, {
+ # rtlnitro
+ 'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player',
+ 'only_matching': True,
+ }, {
+ # superrtl
+ 'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player',
+ 'only_matching': True,
+ }, {
+ # ntv
+ 'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player',
+ 'only_matching': True,
+ }, {
+ # vox
+ 'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player',
+ 'only_matching': True,
+ }, {
+ # rtlplus
+ 'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id = '%s/%s' % mobj.group(2, 3)
+
+ info = self._call_api(
+ 'movies/' + display_id, display_id, query={
+ 'fields': ','.join(self._VIDEO_FIELDS),
+ })
+
+ return self._extract_video(info, display_id)
+
+
+class TVNowNewIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?P<base_url>https?://
+ (?:www\.)?tvnow\.(?:de|at|ch)/
+ (?:shows|serien))/
+ (?P<show>[^/]+)-\d+/
+ [^/]+/
+ episode-\d+-(?P<episode>[^/?$&]+)-(?P<id>\d+)
+ '''
+
+ _TESTS = [{
+ 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ base_url = re.sub(r'(?:shows|serien)', '_', mobj.group('base_url'))
+ show, episode = mobj.group('show', 'episode')
+ return self.url_result(
+ # Rewrite new URLs to the old format and use extraction via old API
+ # at api.tvnow.de as a loophole for bypassing premium content checks
+ '%s/%s/%s' % (base_url, show, episode),
+ ie=TVNowIE.ie_key(), video_id=mobj.group('id'))
+
+
+class TVNowFilmIE(TVNowBaseIE):
+ _VALID_URL = r'''(?x)
+ (?P<base_url>https?://
+ (?:www\.)?tvnow\.(?:de|at|ch)/
+ (?:filme))/
+ (?P<title>[^/?$&]+)-(?P<id>\d+)
+ '''
+ _TESTS = [{
+ 'url': 'https://www.tvnow.de/filme/lord-of-war-haendler-des-todes-7959',
+ 'info_dict': {
+ 'id': '1426690',
+ 'display_id': 'lord-of-war-haendler-des-todes',
+ 'ext': 'mp4',
+ 'title': 'Lord of War',
+ 'description': 'md5:5eda15c0d5b8cb70dac724c8a0ff89a9',
+ 'timestamp': 1550010000,
+ 'upload_date': '20190212',
+ 'duration': 7016,
+ },
+ }, {
+ 'url': 'https://www.tvnow.de/filme/the-machinist-12157',
+ 'info_dict': {
+ 'id': '328160',
+ 'display_id': 'the-machinist',
+ 'ext': 'mp4',
+ 'title': 'The Machinist',
+ 'description': 'md5:9a0e363fdd74b3a9e1cdd9e21d0ecc28',
+ 'timestamp': 1496469720,
+ 'upload_date': '20170603',
+ 'duration': 5836,
+ },
+ }, {
+ 'url': 'https://www.tvnow.de/filme/horst-schlaemmer-isch-kandidiere-17777',
+ 'only_matching': True, # DRM protected
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id = mobj.group('title')
+
+ webpage = self._download_webpage(url, display_id, fatal=False)
+ if not webpage:
+ raise ExtractorError('Cannot download "%s"' % url, expected=True)
+
+ json_text = get_element_by_id('now-web-state', webpage)
+ if not json_text:
+ raise ExtractorError('Cannot read video data', expected=True)
+
+ json_data = self._parse_json(
+ json_text,
+ display_id,
+ transform_source=lambda x: x.replace('&q;', '"'),
+ fatal=False)
+ if not json_data:
+ raise ExtractorError('Cannot read video data', expected=True)
+
+ player_key = next(
+ (key for key in json_data.keys() if 'module/player' in key),
+ None)
+ page_key = next(
+ (key for key in json_data.keys() if 'page/filme' in key),
+ None)
+ movie_id = try_get(
+ json_data,
+ [
+ lambda x: x[player_key]['body']['id'],
+ lambda x: x[page_key]['body']['modules'][0]['id'],
+ lambda x: x[page_key]['body']['modules'][1]['id']],
+ int)
+ if not movie_id:
+ raise ExtractorError('Cannot extract movie ID', expected=True)
+
+ info = self._call_api(
+ 'movies/%d' % movie_id,
+ display_id,
+ query={'fields': ','.join(self._VIDEO_FIELDS)})
+
+ return self._extract_video(info, display_id)
+
+
+class TVNowNewBaseIE(InfoExtractor):
+ def _call_api(self, path, video_id, query={}):
+ result = self._download_json(
+ 'https://apigw.tvnow.de/module/' + path, video_id, query=query)
+ error = result.get('error')
+ if error:
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, error), expected=True)
+ return result
+
+
+r"""
+TODO: new apigw.tvnow.de based version of TVNowIE. Replace old TVNowIE with it
+when api.tvnow.de is shut down. This version can't bypass premium checks though.
+class TVNowIE(TVNowNewBaseIE):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?tvnow\.(?:de|at|ch)/
+ (?:shows|serien)/[^/]+/
+ (?:[^/]+/)+
+ (?P<display_id>[^/?$&]+)-(?P<id>\d+)
+ '''
+
+ _TESTS = [{
+ # episode with annual navigation
+ 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
+ 'info_dict': {
+ 'id': '331082',
+ 'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
+ 'ext': 'mp4',
+ 'title': 'Der neue Porsche 911 GT 3',
+ 'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'timestamp': 1495994400,
+ 'upload_date': '20170528',
+ 'duration': 5283,
+ 'series': 'GRIP - Das Motormagazin',
+ 'season_number': 14,
+ 'episode_number': 405,
+ 'episode': 'Der neue Porsche 911 GT 3',
+ },
+ }, {
+ # rtl2, episode with season navigation
+ 'url': 'https://www.tvnow.de/shows/armes-deutschland-11471/staffel-3/episode-14-bernd-steht-seit-der-trennung-von-seiner-frau-allein-da-526124',
+ 'only_matching': True,
+ }, {
+ # rtlnitro
+ 'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13/episode-5-auf-eigene-faust-pilot-366822',
+ 'only_matching': True,
+ }, {
+ # superrtl
+ 'url': 'https://www.tvnow.de/shows/die-lustigsten-schlamassel-der-welt-1221/staffel-2/episode-14-u-a-ketchup-effekt-364120',
+ 'only_matching': True,
+ }, {
+ # ntv
+ 'url': 'https://www.tvnow.de/shows/startup-news-10674/staffel-2/episode-39-goetter-in-weiss-387630',
+ 'only_matching': True,
+ }, {
+ # vox
+ 'url': 'https://www.tvnow.de/shows/auto-mobil-174/2017-11/episode-46-neues-vom-automobilmarkt-2017-11-19-17-00-00-380072',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
+ 'only_matching': True,
+ }]
+
+ def _extract_video(self, info, url, display_id):
+ config = info['config']
+ source = config['source']
+
+ video_id = compat_str(info.get('id') or source['videoId'])
+ title = source['title'].strip()
+
+ paths = []
+ for manifest_url in (info.get('manifest') or {}).values():
+ if not manifest_url:
+ continue
+ manifest_url = update_url_query(manifest_url, {'filter': ''})
+ path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path')
+ if path in paths:
+ continue
+ paths.append(path)
+
+ def url_repl(proto, suffix):
+ return re.sub(
+ r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub(
+ r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
+ '.ism/' + suffix, manifest_url))
+
+ formats = self._extract_mpd_formats(
+ url_repl('dash', '.mpd'), video_id,
+ mpd_id='dash', fatal=False)
+ formats.extend(self._extract_ism_formats(
+ url_repl('hss', 'Manifest'),
+ video_id, ism_id='mss', fatal=False))
+ formats.extend(self._extract_m3u8_formats(
+ url_repl('hls', '.m3u8'), video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False))
+ if formats:
+ break
+ else:
+ if try_get(info, lambda x: x['rights']['isDrm']):
+ raise ExtractorError(
+ 'Video %s is DRM protected' % video_id, expected=True)
+ if try_get(config, lambda x: x['boards']['geoBlocking']['block']):
+ raise self.raise_geo_restricted()
+ if not info.get('free', True):
+ raise ExtractorError(
+ 'Video %s is not available for free' % video_id, expected=True)
+ self._sort_formats(formats)
+
+ description = source.get('description')
+ thumbnail = url_or_none(source.get('poster'))
+ timestamp = unified_timestamp(source.get('previewStart'))
+ duration = parse_duration(source.get('length'))
+
+ series = source.get('format')
+ season_number = int_or_none(self._search_regex(
+ r'staffel-(\d+)', url, 'season number', default=None))
+ episode_number = int_or_none(self._search_regex(
+ r'episode-(\d+)', url, 'episode number', default=None))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'timestamp': timestamp,
+ 'duration': duration,
+ 'series': series,
+ 'season_number': season_number,
+ 'episode_number': episode_number,
+ 'episode': title,
+ 'formats': formats,
+ }
+
+ def _real_extract(self, url):
+ display_id, video_id = re.match(self._VALID_URL, url).groups()
+ info = self._call_api('player/' + video_id, video_id)
+ return self._extract_video(info, video_id, display_id)
+
+
+class TVNowFilmIE(TVNowIE):
+ _VALID_URL = r'''(?x)
+ (?P<base_url>https?://
+ (?:www\.)?tvnow\.(?:de|at|ch)/
+ (?:filme))/
+ (?P<title>[^/?$&]+)-(?P<id>\d+)
+ '''
+ _TESTS = [{
+ 'url': 'https://www.tvnow.de/filme/lord-of-war-haendler-des-todes-7959',
+ 'info_dict': {
+ 'id': '1426690',
+ 'display_id': 'lord-of-war-haendler-des-todes',
+ 'ext': 'mp4',
+ 'title': 'Lord of War',
+ 'description': 'md5:5eda15c0d5b8cb70dac724c8a0ff89a9',
+ 'timestamp': 1550010000,
+ 'upload_date': '20190212',
+ 'duration': 7016,
+ },
+ }, {
+ 'url': 'https://www.tvnow.de/filme/the-machinist-12157',
+ 'info_dict': {
+ 'id': '328160',
+ 'display_id': 'the-machinist',
+ 'ext': 'mp4',
+ 'title': 'The Machinist',
+ 'description': 'md5:9a0e363fdd74b3a9e1cdd9e21d0ecc28',
+ 'timestamp': 1496469720,
+ 'upload_date': '20170603',
+ 'duration': 5836,
+ },
+ }, {
+ 'url': 'https://www.tvnow.de/filme/horst-schlaemmer-isch-kandidiere-17777',
+ 'only_matching': True, # DRM protected
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ display_id = mobj.group('title')
+
+ webpage = self._download_webpage(url, display_id, fatal=False)
+ if not webpage:
+ raise ExtractorError('Cannot download "%s"' % url, expected=True)
+
+ json_text = get_element_by_id('now-web-state', webpage)
+ if not json_text:
+ raise ExtractorError('Cannot read video data', expected=True)
+
+ json_data = self._parse_json(
+ json_text,
+ display_id,
+ transform_source=lambda x: x.replace('&q;', '"'),
+ fatal=False)
+ if not json_data:
+ raise ExtractorError('Cannot read video data', expected=True)
+
+ player_key = next(
+ (key for key in json_data.keys() if 'module/player' in key),
+ None)
+ page_key = next(
+ (key for key in json_data.keys() if 'page/filme' in key),
+ None)
+ movie_id = try_get(
+ json_data,
+ [
+ lambda x: x[player_key]['body']['id'],
+ lambda x: x[page_key]['body']['modules'][0]['id'],
+ lambda x: x[page_key]['body']['modules'][1]['id']],
+ int)
+ if not movie_id:
+ raise ExtractorError('Cannot extract movie ID', expected=True)
+
+ info = self._call_api('player/%d' % movie_id, display_id)
+ return self._extract_video(info, url, display_id)
+"""
+
+
+class TVNowListBaseIE(TVNowNewBaseIE):
+ _SHOW_VALID_URL = r'''(?x)
+ (?P<base_url>
+ https?://
+ (?:www\.)?tvnow\.(?:de|at|ch)/(?:shows|serien)/
+ [^/?#&]+-(?P<show_id>\d+)
+ )
+ '''
+
+ @classmethod
+ def suitable(cls, url):
+ return (False if TVNowNewIE.suitable(url)
+ else super(TVNowListBaseIE, cls).suitable(url))
+
+ def _extract_items(self, url, show_id, list_id, query):
+ items = self._call_api(
+ 'teaserrow/format/episode/' + show_id, list_id,
+ query=query)['items']
+
+ entries = []
+ for item in items:
+ if not isinstance(item, dict):
+ continue
+ item_url = urljoin(url, item.get('url'))
+ if not item_url:
+ continue
+ video_id = str_or_none(item.get('id') or item.get('videoId'))
+ item_title = item.get('subheadline') or item.get('text')
+ entries.append(self.url_result(
+ item_url, ie=TVNowNewIE.ie_key(), video_id=video_id,
+ video_title=item_title))
+
+ return self.playlist_result(entries, '%s/%s' % (show_id, list_id))
+
+
+class TVNowSeasonIE(TVNowListBaseIE):
+ _VALID_URL = r'%s/staffel-(?P<id>\d+)' % TVNowListBaseIE._SHOW_VALID_URL
+ _TESTS = [{
+ 'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13',
+ 'info_dict': {
+ 'id': '1815/13',
+ },
+ 'playlist_mincount': 22,
+ }]
+
+ def _real_extract(self, url):
+ _, show_id, season_id = re.match(self._VALID_URL, url).groups()
+ return self._extract_items(
+ url, show_id, season_id, {'season': season_id})
+
+
+class TVNowAnnualIE(TVNowListBaseIE):
+ _VALID_URL = r'%s/(?P<year>\d{4})-(?P<month>\d{2})' % TVNowListBaseIE._SHOW_VALID_URL
+ _TESTS = [{
+ 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05',
+ 'info_dict': {
+ 'id': '1669/2017-05',
+ },
+ 'playlist_mincount': 2,
+ }]
+
+ def _real_extract(self, url):
+ _, show_id, year, month = re.match(self._VALID_URL, url).groups()
+ return self._extract_items(
+ url, show_id, '%s-%s' % (year, month), {
+ 'year': int(year),
+ 'month': int(month),
+ })
+
+
+class TVNowShowIE(TVNowListBaseIE):
+ _VALID_URL = TVNowListBaseIE._SHOW_VALID_URL
+ _TESTS = [{
+ # annual navigationType
+ 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669',
+ 'info_dict': {
+ 'id': '1669',
+ },
+ 'playlist_mincount': 73,
+ }, {
+ # season navigationType
+ 'url': 'https://www.tvnow.de/shows/armes-deutschland-11471',
+ 'info_dict': {
+ 'id': '11471',
+ },
+ 'playlist_mincount': 3,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url)
+ else super(TVNowShowIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ base_url, show_id = re.match(self._VALID_URL, url).groups()
+
+ result = self._call_api(
+ 'teaserrow/format/navigation/' + show_id, show_id)
+
+ items = result['items']
+
+ entries = []
+ navigation = result.get('navigationType')
+ if navigation == 'annual':
+ for item in items:
+ if not isinstance(item, dict):
+ continue
+ year = int_or_none(item.get('year'))
+ if year is None:
+ continue
+ months = item.get('months')
+ if not isinstance(months, list):
+ continue
+ for month_dict in months:
+ if not isinstance(month_dict, dict) or not month_dict:
+ continue
+ month_number = int_or_none(list(month_dict.keys())[0])
+ if month_number is None:
+ continue
+ entries.append(self.url_result(
+ '%s/%04d-%02d' % (base_url, year, month_number),
+ ie=TVNowAnnualIE.ie_key()))
+ elif navigation == 'season':
+ for item in items:
+ if not isinstance(item, dict):
+ continue
+ season_number = int_or_none(item.get('season'))
+ if season_number is None:
+ continue
+ entries.append(self.url_result(
+ '%s/staffel-%d' % (base_url, season_number),
+ ie=TVNowSeasonIE.ie_key()))
+ else:
+ raise ExtractorError('Unknown navigationType')
+
+ return self.playlist_result(entries, show_id)
diff --git a/youtube_dl/extractor/tvp.py b/youtube_dlc/extractor/tvp.py
index accff75b5..accff75b5 100644
--- a/youtube_dl/extractor/tvp.py
+++ b/youtube_dlc/extractor/tvp.py
diff --git a/youtube_dlc/extractor/tvplay.py b/youtube_dlc/extractor/tvplay.py
new file mode 100644
index 000000000..3c2450dd0
--- /dev/null
+++ b/youtube_dlc/extractor/tvplay.py
@@ -0,0 +1,512 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_HTTPError,
+ compat_urlparse,
+)
+from ..utils import (
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ parse_iso8601,
+ qualities,
+ try_get,
+ update_url_query,
+ url_or_none,
+)
+
+
+class TVPlayIE(InfoExtractor):
+ IE_NAME = 'mtg'
+ IE_DESC = 'MTG services'
+ _VALID_URL = r'''(?x)
+ (?:
+ mtg:|
+ https?://
+ (?:www\.)?
+ (?:
+ tvplay(?:\.skaties)?\.lv(?:/parraides)?|
+ (?:tv3play|play\.tv3)\.lt(?:/programos)?|
+ tv3play(?:\.tv3)?\.ee/sisu|
+ (?:tv(?:3|6|8|10)play|viafree)\.se/program|
+ (?:(?:tv3play|viasat4play|tv6play|viafree)\.no|(?:tv3play|viafree)\.dk)/programmer|
+ play\.nova(?:tv)?\.bg/programi
+ )
+ /(?:[^/]+/)+
+ )
+ (?P<id>\d+)
+ '''
+ _TESTS = [
+ {
+ 'url': 'http://www.tvplay.lv/parraides/vinas-melo-labak/418113?autostart=true',
+ 'md5': 'a1612fe0849455423ad8718fe049be21',
+ 'info_dict': {
+ 'id': '418113',
+ 'ext': 'mp4',
+ 'title': 'Kādi ir īri? - Viņas melo labāk',
+ 'description': 'Baiba apsmej īrus, kādi tie ir un ko viņi dara.',
+ 'series': 'Viņas melo labāk',
+ 'season': '2.sezona',
+ 'season_number': 2,
+ 'duration': 25,
+ 'timestamp': 1406097056,
+ 'upload_date': '20140723',
+ },
+ },
+ {
+ 'url': 'http://play.tv3.lt/programos/moterys-meluoja-geriau/409229?autostart=true',
+ 'info_dict': {
+ 'id': '409229',
+ 'ext': 'flv',
+ 'title': 'Moterys meluoja geriau',
+ 'description': 'md5:9aec0fc68e2cbc992d2a140bd41fa89e',
+ 'series': 'Moterys meluoja geriau',
+ 'episode_number': 47,
+ 'season': '1 sezonas',
+ 'season_number': 1,
+ 'duration': 1330,
+ 'timestamp': 1403769181,
+ 'upload_date': '20140626',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.tv3play.ee/sisu/kodu-keset-linna/238551?autostart=true',
+ 'info_dict': {
+ 'id': '238551',
+ 'ext': 'flv',
+ 'title': 'Kodu keset linna 398537',
+ 'description': 'md5:7df175e3c94db9e47c0d81ffa5d68701',
+ 'duration': 1257,
+ 'timestamp': 1292449761,
+ 'upload_date': '20101215',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.tv3play.se/program/husraddarna/395385?autostart=true',
+ 'info_dict': {
+ 'id': '395385',
+ 'ext': 'mp4',
+ 'title': 'Husräddarna S02E07',
+ 'description': 'md5:f210c6c89f42d4fc39faa551be813777',
+ 'duration': 2574,
+ 'timestamp': 1400596321,
+ 'upload_date': '20140520',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.tv6play.se/program/den-sista-dokusapan/266636?autostart=true',
+ 'info_dict': {
+ 'id': '266636',
+ 'ext': 'mp4',
+ 'title': 'Den sista dokusåpan S01E08',
+ 'description': 'md5:295be39c872520221b933830f660b110',
+ 'duration': 1492,
+ 'timestamp': 1330522854,
+ 'upload_date': '20120229',
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.tv8play.se/program/antikjakten/282756?autostart=true',
+ 'info_dict': {
+ 'id': '282756',
+ 'ext': 'mp4',
+ 'title': 'Antikjakten S01E10',
+ 'description': 'md5:1b201169beabd97e20c5ad0ad67b13b8',
+ 'duration': 2646,
+ 'timestamp': 1348575868,
+ 'upload_date': '20120925',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.tv3play.no/programmer/anna-anka-soker-assistent/230898?autostart=true',
+ 'info_dict': {
+ 'id': '230898',
+ 'ext': 'mp4',
+ 'title': 'Anna Anka søker assistent - Ep. 8',
+ 'description': 'md5:f80916bf5bbe1c5f760d127f8dd71474',
+ 'duration': 2656,
+ 'timestamp': 1277720005,
+ 'upload_date': '20100628',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.viasat4play.no/programmer/budbringerne/21873?autostart=true',
+ 'info_dict': {
+ 'id': '21873',
+ 'ext': 'mp4',
+ 'title': 'Budbringerne program 10',
+ 'description': 'md5:4db78dc4ec8a85bb04fd322a3ee5092d',
+ 'duration': 1297,
+ 'timestamp': 1254205102,
+ 'upload_date': '20090929',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://www.tv6play.no/programmer/hotelinspektor-alex-polizzi/361883?autostart=true',
+ 'info_dict': {
+ 'id': '361883',
+ 'ext': 'mp4',
+ 'title': 'Hotelinspektør Alex Polizzi - Ep. 10',
+ 'description': 'md5:3ecf808db9ec96c862c8ecb3a7fdaf81',
+ 'duration': 2594,
+ 'timestamp': 1393236292,
+ 'upload_date': '20140224',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'http://play.novatv.bg/programi/zdravei-bulgariya/624952?autostart=true',
+ 'info_dict': {
+ 'id': '624952',
+ 'ext': 'flv',
+ 'title': 'Здравей, България (12.06.2015 г.) ',
+ 'description': 'md5:99f3700451ac5bb71a260268b8daefd7',
+ 'duration': 8838,
+ 'timestamp': 1434100372,
+ 'upload_date': '20150612',
+ },
+ 'params': {
+ # rtmp download
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://play.nova.bg/programi/zdravei-bulgariya/764300?autostart=true',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://tvplay.skaties.lv/parraides/vinas-melo-labak/418113?autostart=true',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://tvplay.skaties.lv/vinas-melo-labak/418113/?autostart=true',
+ 'only_matching': True,
+ },
+ {
+ # views is null
+ 'url': 'http://tvplay.skaties.lv/parraides/tv3-zinas/760183',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://tv3play.tv3.ee/sisu/kodu-keset-linna/238551?autostart=true',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.viafree.se/program/underhallning/i-like-radio-live/sasong-1/676869',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'mtg:418113',
+ 'only_matching': True,
+ }
+ ]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ geo_country = self._search_regex(
+ r'https?://[^/]+\.([a-z]{2})', url,
+ 'geo country', default=None)
+ if geo_country:
+ self._initialize_geo_bypass({'countries': [geo_country.upper()]})
+ video = self._download_json(
+ 'http://playapi.mtgx.tv/v3/videos/%s' % video_id, video_id, 'Downloading video JSON')
+
+ title = video['title']
+
+ try:
+ streams = self._download_json(
+ 'http://playapi.mtgx.tv/v3/videos/stream/%s' % video_id,
+ video_id, 'Downloading streams JSON')
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ msg = self._parse_json(e.cause.read().decode('utf-8'), video_id)
+ raise ExtractorError(msg['msg'], expected=True)
+ raise
+
+ quality = qualities(['hls', 'medium', 'high'])
+ formats = []
+ for format_id, video_url in streams.get('streams', {}).items():
+ video_url = url_or_none(video_url)
+ if not video_url:
+ continue
+ ext = determine_ext(video_url)
+ if ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ update_url_query(video_url, {
+ 'hdcore': '3.5.0',
+ 'plugin': 'aasp-3.5.0.151.81'
+ }), video_id, f4m_id='hds', fatal=False))
+ elif ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ fmt = {
+ 'format_id': format_id,
+ 'quality': quality(format_id),
+ 'ext': ext,
+ }
+ if video_url.startswith('rtmp'):
+ m = re.search(
+ r'^(?P<url>rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', video_url)
+ if not m:
+ continue
+ fmt.update({
+ 'ext': 'flv',
+ 'url': m.group('url'),
+ 'app': m.group('app'),
+ 'play_path': m.group('playpath'),
+ 'preference': -1,
+ })
+ else:
+ fmt.update({
+ 'url': video_url,
+ })
+ formats.append(fmt)
+
+ if not formats and video.get('is_geo_blocked'):
+ self.raise_geo_restricted(
+ 'This content might not be available in your country due to copyright reasons')
+
+ self._sort_formats(formats)
+
+ # TODO: webvtt in m3u8
+ subtitles = {}
+ sami_path = video.get('sami_path')
+ if sami_path:
+ lang = self._search_regex(
+ r'_([a-z]{2})\.xml', sami_path, 'lang',
+ default=compat_urlparse.urlparse(url).netloc.rsplit('.', 1)[-1])
+ subtitles[lang] = [{
+ 'url': sami_path,
+ }]
+
+ series = video.get('format_title')
+ episode_number = int_or_none(video.get('format_position', {}).get('episode'))
+ season = video.get('_embedded', {}).get('season', {}).get('title')
+ season_number = int_or_none(video.get('format_position', {}).get('season'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'series': series,
+ 'episode_number': episode_number,
+ 'season': season,
+ 'season_number': season_number,
+ 'duration': int_or_none(video.get('duration')),
+ 'timestamp': parse_iso8601(video.get('created_at')),
+ 'view_count': try_get(video, lambda x: x['views']['total'], int),
+ 'age_limit': int_or_none(video.get('age_limit', 0)),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+
+class ViafreeIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:www\.)?
+ viafree\.(?P<country>dk|no|se)
+ /(?P<id>program(?:mer)?/(?:[^/]+/)+[^/?#&]+)
+ '''
+ _TESTS = [{
+ 'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1',
+ 'info_dict': {
+ 'id': '757786',
+ 'ext': 'mp4',
+ 'title': 'Det beste vorspielet - Sesong 2 - Episode 1',
+ 'description': 'md5:b632cb848331404ccacd8cd03e83b4c3',
+ 'series': 'Det beste vorspielet',
+ 'season_number': 2,
+ 'duration': 1116,
+ 'timestamp': 1471200600,
+ 'upload_date': '20160814',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # with relatedClips
+ 'url': 'http://www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-1',
+ 'only_matching': True,
+ }, {
+ # Different og:image URL schema
+ 'url': 'http://www.viafree.se/program/reality/sommaren-med-youtube-stjarnorna/sasong-1/avsnitt-2',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.viafree.se/program/livsstil/husraddarna/sasong-2/avsnitt-2',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.viafree.dk/programmer/reality/paradise-hotel/saeson-7/episode-5',
+ 'only_matching': True,
+ }]
+ _GEO_BYPASS = False
+
+ @classmethod
+ def suitable(cls, url):
+ return False if TVPlayIE.suitable(url) else super(ViafreeIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ country, path = re.match(self._VALID_URL, url).groups()
+ content = self._download_json(
+ 'https://viafree-content.mtg-api.com/viafree-content/v1/%s/path/%s' % (country, path), path)
+ program = content['_embedded']['viafreeBlocks'][0]['_embedded']['program']
+ guid = program['guid']
+ meta = content['meta']
+ title = meta['title']
+
+ try:
+ stream_href = self._download_json(
+ program['_links']['streamLink']['href'], guid,
+ headers=self.geo_verification_headers())['embedded']['prioritizedStreams'][0]['links']['stream']['href']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ self.raise_geo_restricted(countries=[country])
+ raise
+
+ formats = self._extract_m3u8_formats(stream_href, guid, 'mp4')
+ self._sort_formats(formats)
+ episode = program.get('episode') or {}
+
+ return {
+ 'id': guid,
+ 'title': title,
+ 'thumbnail': meta.get('image'),
+ 'description': meta.get('description'),
+ 'series': episode.get('seriesTitle'),
+ 'episode_number': int_or_none(episode.get('episodeNumber')),
+ 'season_number': int_or_none(episode.get('seasonNumber')),
+ 'duration': int_or_none(try_get(program, lambda x: x['video']['duration']['milliseconds']), 1000),
+ 'timestamp': parse_iso8601(try_get(program, lambda x: x['availability']['start'])),
+ 'formats': formats,
+ }
+
+
+class TVPlayHomeIE(InfoExtractor):
+ _VALID_URL = r'https?://tvplay\.(?:tv3\.lt|skaties\.lv|tv3\.ee)/[^/]+/[^/?#&]+-(?P<id>\d+)'
+ _TESTS = [{
+ 'url': 'https://tvplay.tv3.lt/aferistai-n-7/aferistai-10047125/',
+ 'info_dict': {
+ 'id': '366367',
+ 'ext': 'mp4',
+ 'title': 'Aferistai',
+ 'description': 'Aferistai. Kalėdinė pasaka.',
+ 'series': 'Aferistai [N-7]',
+ 'season': '1 sezonas',
+ 'season_number': 1,
+ 'duration': 464,
+ 'timestamp': 1394209658,
+ 'upload_date': '20140307',
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [TVPlayIE.ie_key()],
+ }, {
+ 'url': 'https://tvplay.skaties.lv/vinas-melo-labak/vinas-melo-labak-10280317/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://tvplay.tv3.ee/cool-d-ga-mehhikosse/cool-d-ga-mehhikosse-10044354/',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_id = self._search_regex(
+ r'data-asset-id\s*=\s*["\'](\d{5,})\b', webpage, 'video id')
+
+ if len(video_id) < 8:
+ return self.url_result(
+ 'mtg:%s' % video_id, ie=TVPlayIE.ie_key(), video_id=video_id)
+
+ m3u8_url = self._search_regex(
+ r'data-file\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'm3u8 url', group='url')
+
+ formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls')
+ self._sort_formats(formats)
+
+ title = self._search_regex(
+ r'data-title\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
+ 'title', default=None, group='value') or self._html_search_meta(
+ 'title', webpage, default=None) or self._og_search_title(
+ webpage)
+
+ description = self._html_search_meta(
+ 'description', webpage,
+ default=None) or self._og_search_description(webpage)
+
+ thumbnail = self._search_regex(
+ r'data-image\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'thumbnail', default=None, group='url') or self._html_search_meta(
+ 'thumbnail', webpage, default=None) or self._og_search_thumbnail(
+ webpage)
+
+ duration = int_or_none(self._search_regex(
+ r'data-duration\s*=\s*["\'](\d+)', webpage, 'duration',
+ fatal=False))
+
+ season = self._search_regex(
+ (r'data-series-title\s*=\s*(["\'])[^/]+/(?P<value>(?:(?!\1).)+)\1',
+ r'\bseason\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
+ 'season', default=None, group='value')
+ season_number = int_or_none(self._search_regex(
+ r'(\d+)(?:[.\s]+sezona|\s+HOOAEG)', season or '', 'season number',
+ default=None))
+ episode = self._search_regex(
+ (r'\bepisode\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+ r'data-subtitle\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
+ 'episode', default=None, group='value')
+ episode_number = int_or_none(self._search_regex(
+ r'(?:S[eē]rija|Osa)\s+(\d+)', episode or '', 'episode number',
+ default=None))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'season': season,
+ 'season_number': season_number,
+ 'episode': episode,
+ 'episode_number': episode_number,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/tvplayer.py b/youtube_dlc/extractor/tvplayer.py
index 8f8686a65..8f8686a65 100644
--- a/youtube_dl/extractor/tvplayer.py
+++ b/youtube_dlc/extractor/tvplayer.py
diff --git a/youtube_dl/extractor/tweakers.py b/youtube_dlc/extractor/tweakers.py
index 2b10d9bca..2b10d9bca 100644
--- a/youtube_dl/extractor/tweakers.py
+++ b/youtube_dlc/extractor/tweakers.py
diff --git a/youtube_dlc/extractor/twentyfourvideo.py b/youtube_dlc/extractor/twentyfourvideo.py
new file mode 100644
index 000000000..74d14049b
--- /dev/null
+++ b/youtube_dlc/extractor/twentyfourvideo.py
@@ -0,0 +1,133 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_iso8601,
+ int_or_none,
+ xpath_attr,
+ xpath_element,
+)
+
+
+class TwentyFourVideoIE(InfoExtractor):
+ IE_NAME = '24video'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?P<host>
+ (?:(?:www|porno?)\.)?24video\.
+ (?:net|me|xxx|sexy?|tube|adult|site|vip)
+ )/
+ (?:
+ video/(?:(?:view|xml)/)?|
+ player/new24_play\.swf\?id=
+ )
+ (?P<id>\d+)
+ '''
+
+ _TESTS = [{
+ 'url': 'http://www.24video.net/video/view/1044982',
+ 'md5': 'e09fc0901d9eaeedac872f154931deeb',
+ 'info_dict': {
+ 'id': '1044982',
+ 'ext': 'mp4',
+ 'title': 'Эротика каменного века',
+ 'description': 'Как смотрели порно в каменном веке.',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'SUPERTELO',
+ 'duration': 31,
+ 'timestamp': 1275937857,
+ 'upload_date': '20100607',
+ 'age_limit': 18,
+ 'like_count': int,
+ 'dislike_count': int,
+ },
+ }, {
+ 'url': 'http://www.24video.net/player/new24_play.swf?id=1044982',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.24video.me/video/view/1044982',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.24video.tube/video/view/2363750',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.24video.site/video/view/2640421',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://porno.24video.net/video/2640421-vsya-takaya-gibkaya-i-v-masle',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.24video.vip/video/view/1044982',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://porn.24video.net/video/2640421-vsya-takay',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ host = mobj.group('host')
+
+ webpage = self._download_webpage(
+ 'http://%s/video/view/%s' % (host, video_id), video_id)
+
+ title = self._og_search_title(webpage)
+ description = self._html_search_regex(
+ r'<(p|span)[^>]+itemprop="description"[^>]*>(?P<description>[^<]+)</\1>',
+ webpage, 'description', fatal=False, group='description')
+ thumbnail = self._og_search_thumbnail(webpage)
+ duration = int_or_none(self._og_search_property(
+ 'duration', webpage, 'duration', fatal=False))
+ timestamp = parse_iso8601(self._search_regex(
+ r'<time[^>]+\bdatetime="([^"]+)"[^>]+itemprop="uploadDate"',
+ webpage, 'upload date', fatal=False))
+
+ uploader = self._html_search_regex(
+ r'class="video-uploaded"[^>]*>\s*<a href="/jsecUser/movies/[^"]+"[^>]*>([^<]+)</a>',
+ webpage, 'uploader', fatal=False)
+
+ view_count = int_or_none(self._html_search_regex(
+ r'<span class="video-views">(\d+) просмотр',
+ webpage, 'view count', fatal=False))
+ comment_count = int_or_none(self._html_search_regex(
+ r'<a[^>]+href="#tab-comments"[^>]*>(\d+) комментари',
+ webpage, 'comment count', default=None))
+
+ # Sets some cookies
+ self._download_xml(
+ r'http://%s/video/xml/%s?mode=init' % (host, video_id),
+ video_id, 'Downloading init XML')
+
+ video_xml = self._download_xml(
+ 'http://%s/video/xml/%s?mode=play' % (host, video_id),
+ video_id, 'Downloading video XML')
+
+ video = xpath_element(video_xml, './/video', 'video', fatal=True)
+
+ formats = [{
+ 'url': xpath_attr(video, '', 'url', 'video URL', fatal=True),
+ }]
+
+ like_count = int_or_none(video.get('ratingPlus'))
+ dislike_count = int_or_none(video.get('ratingMinus'))
+ age_limit = 18 if video.get('adult') == 'true' else 0
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/twentymin.py b/youtube_dlc/extractor/twentymin.py
index a42977f39..a42977f39 100644
--- a/youtube_dl/extractor/twentymin.py
+++ b/youtube_dlc/extractor/twentymin.py
diff --git a/youtube_dl/extractor/twentythreevideo.py b/youtube_dlc/extractor/twentythreevideo.py
index aa0c6e90f..aa0c6e90f 100644
--- a/youtube_dl/extractor/twentythreevideo.py
+++ b/youtube_dlc/extractor/twentythreevideo.py
diff --git a/youtube_dl/extractor/twitcasting.py b/youtube_dlc/extractor/twitcasting.py
index 2dbe89f5b..2dbe89f5b 100644
--- a/youtube_dl/extractor/twitcasting.py
+++ b/youtube_dlc/extractor/twitcasting.py
diff --git a/youtube_dlc/extractor/twitch.py b/youtube_dlc/extractor/twitch.py
new file mode 100644
index 000000000..35e4dda37
--- /dev/null
+++ b/youtube_dlc/extractor/twitch.py
@@ -0,0 +1,802 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import itertools
+import re
+import random
+import json
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_kwargs,
+ compat_parse_qs,
+ compat_str,
+ compat_urllib_parse_urlencode,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ orderedSet,
+ parse_duration,
+ parse_iso8601,
+ qualities,
+ str_or_none,
+ try_get,
+ unified_timestamp,
+ update_url_query,
+ url_or_none,
+ urljoin,
+)
+
+
+class TwitchBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:(?:www|go|m)\.)?twitch\.tv'
+
+ _API_BASE = 'https://api.twitch.tv'
+ _USHER_BASE = 'https://usher.ttvnw.net'
+ _LOGIN_FORM_URL = 'https://www.twitch.tv/login'
+ _LOGIN_POST_URL = 'https://passport.twitch.tv/login'
+ _CLIENT_ID = 'kimne78kx3ncx6brgo4mv6wki5h1ko'
+ _NETRC_MACHINE = 'twitch'
+
+ def _handle_error(self, response):
+ if not isinstance(response, dict):
+ return
+ error = response.get('error')
+ if error:
+ raise ExtractorError(
+ '%s returned error: %s - %s' % (self.IE_NAME, error, response.get('message')),
+ expected=True)
+
+ def _call_api(self, path, item_id, *args, **kwargs):
+ headers = kwargs.get('headers', {}).copy()
+ headers.update({
+ 'Accept': 'application/vnd.twitchtv.v5+json; charset=UTF-8',
+ 'Client-ID': self._CLIENT_ID,
+ })
+ kwargs.update({
+ 'headers': headers,
+ 'expected_status': (400, 410),
+ })
+ response = self._download_json(
+ '%s/%s' % (self._API_BASE, path), item_id,
+ *args, **compat_kwargs(kwargs))
+ self._handle_error(response)
+ return response
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ def fail(message):
+ raise ExtractorError(
+ 'Unable to login. Twitch said: %s' % message, expected=True)
+
+ def login_step(page, urlh, note, data):
+ form = self._hidden_inputs(page)
+ form.update(data)
+
+ page_url = urlh.geturl()
+ post_url = self._search_regex(
+ r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page,
+ 'post url', default=self._LOGIN_POST_URL, group='url')
+ post_url = urljoin(page_url, post_url)
+
+ headers = {
+ 'Referer': page_url,
+ 'Origin': page_url,
+ 'Content-Type': 'text/plain;charset=UTF-8',
+ }
+
+ response = self._download_json(
+ post_url, None, note, data=json.dumps(form).encode(),
+ headers=headers, expected_status=400)
+ error = response.get('error_description') or response.get('error_code')
+ if error:
+ fail(error)
+
+ if 'Authenticated successfully' in response.get('message', ''):
+ return None, None
+
+ redirect_url = urljoin(
+ post_url,
+ response.get('redirect') or response['redirect_path'])
+ return self._download_webpage_handle(
+ redirect_url, None, 'Downloading login redirect page',
+ headers=headers)
+
+ login_page, handle = self._download_webpage_handle(
+ self._LOGIN_FORM_URL, None, 'Downloading login page')
+
+ # Some TOR nodes and public proxies are blocked completely
+ if 'blacklist_message' in login_page:
+ fail(clean_html(login_page))
+
+ redirect_page, handle = login_step(
+ login_page, handle, 'Logging in', {
+ 'username': username,
+ 'password': password,
+ 'client_id': self._CLIENT_ID,
+ })
+
+ # Successful login
+ if not redirect_page:
+ return
+
+ if re.search(r'(?i)<form[^>]+id="two-factor-submit"', redirect_page) is not None:
+ # TODO: Add mechanism to request an SMS or phone call
+ tfa_token = self._get_tfa_info('two-factor authentication token')
+ login_step(redirect_page, handle, 'Submitting TFA token', {
+ 'authy_token': tfa_token,
+ 'remember_2fa': 'true',
+ })
+
+ def _prefer_source(self, formats):
+ try:
+ source = next(f for f in formats if f['format_id'] == 'Source')
+ source['quality'] = 10
+ except StopIteration:
+ for f in formats:
+ if '/chunked/' in f['url']:
+ f.update({
+ 'quality': 10,
+ 'format_note': 'Source',
+ })
+ self._sort_formats(formats)
+
+
+class TwitchItemBaseIE(TwitchBaseIE):
+ def _download_info(self, item, item_id):
+ return self._extract_info(self._call_api(
+ 'kraken/videos/%s%s' % (item, item_id), item_id,
+ 'Downloading %s info JSON' % self._ITEM_TYPE))
+
+ def _extract_media(self, item_id):
+ info = self._download_info(self._ITEM_SHORTCUT, item_id)
+ response = self._call_api(
+ 'api/videos/%s%s' % (self._ITEM_SHORTCUT, item_id), item_id,
+ 'Downloading %s playlist JSON' % self._ITEM_TYPE)
+ entries = []
+ chunks = response['chunks']
+ qualities = list(chunks.keys())
+ for num, fragment in enumerate(zip(*chunks.values()), start=1):
+ formats = []
+ for fmt_num, fragment_fmt in enumerate(fragment):
+ format_id = qualities[fmt_num]
+ fmt = {
+ 'url': fragment_fmt['url'],
+ 'format_id': format_id,
+ 'quality': 1 if format_id == 'live' else 0,
+ }
+ m = re.search(r'^(?P<height>\d+)[Pp]', format_id)
+ if m:
+ fmt['height'] = int(m.group('height'))
+ formats.append(fmt)
+ self._sort_formats(formats)
+ entry = dict(info)
+ entry['id'] = '%s_%d' % (entry['id'], num)
+ entry['title'] = '%s part %d' % (entry['title'], num)
+ entry['formats'] = formats
+ entries.append(entry)
+ return self.playlist_result(entries, info['id'], info['title'])
+
+ def _extract_info(self, info):
+ status = info.get('status')
+ if status == 'recording':
+ is_live = True
+ elif status == 'recorded':
+ is_live = False
+ else:
+ is_live = None
+ _QUALITIES = ('small', 'medium', 'large')
+ quality_key = qualities(_QUALITIES)
+ thumbnails = []
+ preview = info.get('preview')
+ if isinstance(preview, dict):
+ for thumbnail_id, thumbnail_url in preview.items():
+ thumbnail_url = url_or_none(thumbnail_url)
+ if not thumbnail_url:
+ continue
+ if thumbnail_id not in _QUALITIES:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'preference': quality_key(thumbnail_id),
+ })
+ return {
+ 'id': info['_id'],
+ 'title': info.get('title') or 'Untitled Broadcast',
+ 'description': info.get('description'),
+ 'duration': int_or_none(info.get('length')),
+ 'thumbnails': thumbnails,
+ 'uploader': info.get('channel', {}).get('display_name'),
+ 'uploader_id': info.get('channel', {}).get('name'),
+ 'timestamp': parse_iso8601(info.get('recorded_at')),
+ 'view_count': int_or_none(info.get('views')),
+ 'is_live': is_live,
+ }
+
+ def _real_extract(self, url):
+ return self._extract_media(self._match_id(url))
+
+
+class TwitchVideoIE(TwitchItemBaseIE):
+ IE_NAME = 'twitch:video'
+ _VALID_URL = r'%s/[^/]+/b/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
+ _ITEM_TYPE = 'video'
+ _ITEM_SHORTCUT = 'a'
+
+ _TEST = {
+ 'url': 'http://www.twitch.tv/riotgames/b/577357806',
+ 'info_dict': {
+ 'id': 'a577357806',
+ 'title': 'Worlds Semifinals - Star Horn Royal Club vs. OMG',
+ },
+ 'playlist_mincount': 12,
+ 'skip': 'HTTP Error 404: Not Found',
+ }
+
+
+class TwitchChapterIE(TwitchItemBaseIE):
+ IE_NAME = 'twitch:chapter'
+ _VALID_URL = r'%s/[^/]+/c/(?P<id>\d+)' % TwitchBaseIE._VALID_URL_BASE
+ _ITEM_TYPE = 'chapter'
+ _ITEM_SHORTCUT = 'c'
+
+ _TESTS = [{
+ 'url': 'http://www.twitch.tv/acracingleague/c/5285812',
+ 'info_dict': {
+ 'id': 'c5285812',
+ 'title': 'ACRL Off Season - Sports Cars @ Nordschleife',
+ },
+ 'playlist_mincount': 3,
+ 'skip': 'HTTP Error 404: Not Found',
+ }, {
+ 'url': 'http://www.twitch.tv/tsm_theoddone/c/2349361',
+ 'only_matching': True,
+ }]
+
+
+class TwitchVodIE(TwitchItemBaseIE):
+ IE_NAME = 'twitch:vod'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/|
+ player\.twitch\.tv/\?.*?\bvideo=v?
+ )
+ (?P<id>\d+)
+ '''
+ _ITEM_TYPE = 'vod'
+ _ITEM_SHORTCUT = 'v'
+
+ _TESTS = [{
+ 'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s',
+ 'info_dict': {
+ 'id': 'v6528877',
+ 'ext': 'mp4',
+ 'title': 'LCK Summer Split - Week 6 Day 1',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 17208,
+ 'timestamp': 1435131709,
+ 'upload_date': '20150624',
+ 'uploader': 'Riot Games',
+ 'uploader_id': 'riotgames',
+ 'view_count': int,
+ 'start_time': 310,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # Untitled broadcast (title is None)
+ 'url': 'http://www.twitch.tv/belkao_o/v/11230755',
+ 'info_dict': {
+ 'id': 'v11230755',
+ 'ext': 'mp4',
+ 'title': 'Untitled Broadcast',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 1638,
+ 'timestamp': 1439746708,
+ 'upload_date': '20150816',
+ 'uploader': 'BelkAO_o',
+ 'uploader_id': 'belkao_o',
+ 'view_count': int,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ 'skip': 'HTTP Error 404: Not Found',
+ }, {
+ 'url': 'http://player.twitch.tv/?t=5m10s&video=v6528877',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.twitch.tv/videos/6528877',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.twitch.tv/beagsandjam/v/247478721',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.twitch.tv/northernlion/video/291940395',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://player.twitch.tv/?video=480452374',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ item_id = self._match_id(url)
+
+ info = self._download_info(self._ITEM_SHORTCUT, item_id)
+ access_token = self._call_api(
+ 'api/vods/%s/access_token' % item_id, item_id,
+ 'Downloading %s access token' % self._ITEM_TYPE)
+
+ formats = self._extract_m3u8_formats(
+ '%s/vod/%s.m3u8?%s' % (
+ self._USHER_BASE, item_id,
+ compat_urllib_parse_urlencode({
+ 'allow_source': 'true',
+ 'allow_audio_only': 'true',
+ 'allow_spectre': 'true',
+ 'player': 'twitchweb',
+ 'playlist_include_framerate': 'true',
+ 'nauth': access_token['token'],
+ 'nauthsig': access_token['sig'],
+ })),
+ item_id, 'mp4', entry_protocol='m3u8_native')
+
+ self._prefer_source(formats)
+ info['formats'] = formats
+
+ parsed_url = compat_urllib_parse_urlparse(url)
+ query = compat_parse_qs(parsed_url.query)
+ if 't' in query:
+ info['start_time'] = parse_duration(query['t'][0])
+
+ if info.get('timestamp') is not None:
+ info['subtitles'] = {
+ 'rechat': [{
+ 'url': update_url_query(
+ 'https://api.twitch.tv/v5/videos/%s/comments' % item_id, {
+ 'client_id': self._CLIENT_ID,
+ }),
+ 'ext': 'json',
+ }],
+ }
+
+ return info
+
+
+class TwitchPlaylistBaseIE(TwitchBaseIE):
+ _PLAYLIST_PATH = 'kraken/channels/%s/videos/?offset=%d&limit=%d'
+ _PAGE_LIMIT = 100
+
+ def _extract_playlist(self, channel_name):
+ info = self._call_api(
+ 'kraken/users?login=%s' % channel_name,
+ channel_name, 'Downloading channel info JSON')
+ info = info['users'][0]
+ channel_id = info['_id']
+ channel_name = info.get('display_name') or info.get('name') or channel_name
+ entries = []
+ offset = 0
+ limit = self._PAGE_LIMIT
+ broken_paging_detected = False
+ counter_override = None
+ for counter in itertools.count(1):
+ response = self._call_api(
+ self._PLAYLIST_PATH % (channel_id, offset, limit),
+ channel_id,
+ 'Downloading %s JSON page %s'
+ % (self._PLAYLIST_TYPE, counter_override or counter))
+ page_entries = self._extract_playlist_page(response)
+ if not page_entries:
+ break
+ total = int_or_none(response.get('_total'))
+ # Since the beginning of March 2016 twitch's paging mechanism
+ # is completely broken on the twitch side. It simply ignores
+ # a limit and returns the whole offset number of videos.
+ # Working around by just requesting all videos at once.
+ # Upd: pagination bug was fixed by twitch on 15.03.2016.
+ if not broken_paging_detected and total and len(page_entries) > limit:
+ self.report_warning(
+ 'Twitch pagination is broken on twitch side, requesting all videos at once',
+ channel_id)
+ broken_paging_detected = True
+ offset = total
+ counter_override = '(all at once)'
+ continue
+ entries.extend(page_entries)
+ if broken_paging_detected or total and len(page_entries) >= total:
+ break
+ offset += limit
+ return self.playlist_result(
+ [self._make_url_result(entry) for entry in orderedSet(entries)],
+ channel_id, channel_name)
+
+ def _make_url_result(self, url):
+ try:
+ video_id = 'v%s' % TwitchVodIE._match_id(url)
+ return self.url_result(url, TwitchVodIE.ie_key(), video_id=video_id)
+ except AssertionError:
+ return self.url_result(url)
+
+ def _extract_playlist_page(self, response):
+ videos = response.get('videos')
+ return [video['url'] for video in videos] if videos else []
+
+ def _real_extract(self, url):
+ return self._extract_playlist(self._match_id(url))
+
+
+class TwitchProfileIE(TwitchPlaylistBaseIE):
+ IE_NAME = 'twitch:profile'
+ _VALID_URL = r'%s/(?P<id>[^/]+)/profile/?(?:\#.*)?$' % TwitchBaseIE._VALID_URL_BASE
+ _PLAYLIST_TYPE = 'profile'
+
+ _TESTS = [{
+ 'url': 'http://www.twitch.tv/vanillatv/profile',
+ 'info_dict': {
+ 'id': '22744919',
+ 'title': 'VanillaTV',
+ },
+ 'playlist_mincount': 412,
+ }, {
+ 'url': 'http://m.twitch.tv/vanillatv/profile',
+ 'only_matching': True,
+ }]
+
+
+class TwitchVideosBaseIE(TwitchPlaylistBaseIE):
+ _VALID_URL_VIDEOS_BASE = r'%s/(?P<id>[^/]+)/videos' % TwitchBaseIE._VALID_URL_BASE
+ _VALID_URL_VIDEOS_FILTERS = r'\?(?:.*?[&;])??filter=%s'
+ _PLAYLIST_PATH = TwitchPlaylistBaseIE._PLAYLIST_PATH + '&broadcast_type='
+
+
+class TwitchAllVideosIE(TwitchVideosBaseIE):
+ IE_NAME = 'twitch:videos:all'
+ _VALID_URL = '%s/?(?:(?:%s)|$)' % (
+ TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE,
+ TwitchVideosBaseIE._VALID_URL_VIDEOS_FILTERS % 'all'
+ )
+ _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive,upload,highlight'
+ _PLAYLIST_TYPE = 'all videos'
+
+ _TESTS = [{
+ 'url': 'https://www.twitch.tv/spamfish/videos?filter=all&sort=time',
+ 'info_dict': {
+ 'id': '497952',
+ 'title': 'Spamfish',
+ },
+ 'playlist_mincount': 869,
+ }, {
+ 'url': 'https://m.twitch.tv/spamfish/videos/',
+ 'only_matching': True,
+ }]
+
+
+class TwitchUploadsIE(TwitchVideosBaseIE):
+ IE_NAME = 'twitch:videos:uploads'
+ _VALID_URL = '%s/?(?:%s)' % (
+ TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE,
+ TwitchVideosBaseIE._VALID_URL_VIDEOS_FILTERS % 'uploads'
+ )
+ _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'upload'
+ _PLAYLIST_TYPE = 'uploads'
+
+ _TESTS = [{
+ 'url': 'https://www.twitch.tv/spamfish/videos?filter=uploads&sort=time',
+ 'info_dict': {
+ 'id': '497952',
+ 'title': 'Spamfish',
+ },
+ 'playlist_mincount': 0,
+ }]
+
+
+class TwitchPastBroadcastsIE(TwitchVideosBaseIE):
+ IE_NAME = 'twitch:videos:past-broadcasts'
+ _VALID_URL = '%s/?(?:%s)' % (
+ TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE,
+ TwitchVideosBaseIE._VALID_URL_VIDEOS_FILTERS % 'archives'
+ )
+ _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'archive'
+ _PLAYLIST_TYPE = 'past broadcasts'
+
+ _TESTS = [{
+ 'url': 'https://www.twitch.tv/spamfish/videos?filter=archives&sort=time',
+ 'info_dict': {
+ 'id': '497952',
+ 'title': 'Spamfish',
+ },
+ 'playlist_mincount': 0,
+ }]
+
+
+class TwitchHighlightsIE(TwitchVideosBaseIE):
+ IE_NAME = 'twitch:videos:highlights'
+ _VALID_URL = '%s/?(?:%s)' % (
+ TwitchVideosBaseIE._VALID_URL_VIDEOS_BASE,
+ TwitchVideosBaseIE._VALID_URL_VIDEOS_FILTERS % 'highlights'
+ )
+ _PLAYLIST_PATH = TwitchVideosBaseIE._PLAYLIST_PATH + 'highlight'
+ _PLAYLIST_TYPE = 'highlights'
+
+ _TESTS = [{
+ 'url': 'https://www.twitch.tv/spamfish/videos?filter=highlights&sort=views',
+ 'info_dict': {
+ 'id': '497952',
+ 'title': 'Spamfish',
+ },
+ 'playlist_mincount': 805,
+ }]
+
+
+class TwitchStreamIE(TwitchBaseIE):
+ IE_NAME = 'twitch:stream'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:(?:www|go|m)\.)?twitch\.tv/|
+ player\.twitch\.tv/\?.*?\bchannel=
+ )
+ (?P<id>[^/#?]+)
+ '''
+
+ _TESTS = [{
+ 'url': 'http://www.twitch.tv/shroomztv',
+ 'info_dict': {
+ 'id': '12772022048',
+ 'display_id': 'shroomztv',
+ 'ext': 'mp4',
+ 'title': 're:^ShroomzTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ 'description': 'H1Z1 - lonewolfing with ShroomzTV | A3 Battle Royale later - @ShroomzTV',
+ 'is_live': True,
+ 'timestamp': 1421928037,
+ 'upload_date': '20150122',
+ 'uploader': 'ShroomzTV',
+ 'uploader_id': 'shroomztv',
+ 'view_count': int,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.twitch.tv/miracle_doto#profile-0',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://player.twitch.tv/?channel=lotsofs',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://go.twitch.tv/food',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.twitch.tv/food',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (False
+ if any(ie.suitable(url) for ie in (
+ TwitchVideoIE,
+ TwitchChapterIE,
+ TwitchVodIE,
+ TwitchProfileIE,
+ TwitchAllVideosIE,
+ TwitchUploadsIE,
+ TwitchPastBroadcastsIE,
+ TwitchHighlightsIE,
+ TwitchClipsIE))
+ else super(TwitchStreamIE, cls).suitable(url))
+
+ def _real_extract(self, url):
+ channel_name = self._match_id(url)
+
+ access_token = self._call_api(
+ 'api/channels/%s/access_token' % channel_name, channel_name,
+ 'Downloading access token JSON')
+
+ token = access_token['token']
+ channel_id = compat_str(self._parse_json(
+ token, channel_name)['channel_id'])
+
+ stream = self._call_api(
+ 'kraken/streams/%s?stream_type=all' % channel_id,
+ channel_id, 'Downloading stream JSON').get('stream')
+
+ if not stream:
+ raise ExtractorError('%s is offline' % channel_id, expected=True)
+
+ # Channel name may be typed if different case than the original channel name
+ # (e.g. http://www.twitch.tv/TWITCHPLAYSPOKEMON) that will lead to constructing
+ # an invalid m3u8 URL. Working around by use of original channel name from stream
+ # JSON and fallback to lowercase if it's not available.
+ channel_name = try_get(
+ stream, lambda x: x['channel']['name'],
+ compat_str) or channel_name.lower()
+
+ query = {
+ 'allow_source': 'true',
+ 'allow_audio_only': 'true',
+ 'allow_spectre': 'true',
+ 'p': random.randint(1000000, 10000000),
+ 'player': 'twitchweb',
+ 'playlist_include_framerate': 'true',
+ 'segment_preference': '4',
+ 'sig': access_token['sig'].encode('utf-8'),
+ 'token': token.encode('utf-8'),
+ }
+ formats = self._extract_m3u8_formats(
+ '%s/api/channel/hls/%s.m3u8?%s'
+ % (self._USHER_BASE, channel_name, compat_urllib_parse_urlencode(query)),
+ channel_id, 'mp4')
+ self._prefer_source(formats)
+
+ view_count = stream.get('viewers')
+ timestamp = parse_iso8601(stream.get('created_at'))
+
+ channel = stream['channel']
+ title = self._live_title(channel.get('display_name') or channel.get('name'))
+ description = channel.get('status')
+
+ thumbnails = []
+ for thumbnail_key, thumbnail_url in stream['preview'].items():
+ m = re.search(r'(?P<width>\d+)x(?P<height>\d+)\.jpg$', thumbnail_key)
+ if not m:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'width': int(m.group('width')),
+ 'height': int(m.group('height')),
+ })
+
+ return {
+ 'id': str_or_none(stream.get('_id')) or channel_id,
+ 'display_id': channel_name,
+ 'title': title,
+ 'description': description,
+ 'thumbnails': thumbnails,
+ 'uploader': channel.get('display_name'),
+ 'uploader_id': channel.get('name'),
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'formats': formats,
+ 'is_live': True,
+ }
+
+
+class TwitchClipsIE(TwitchBaseIE):
+ IE_NAME = 'twitch:clips'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ clips\.twitch\.tv/(?:embed\?.*?\bclip=|(?:[^/]+/)*)|
+ (?:(?:www|go|m)\.)?twitch\.tv/[^/]+/clip/
+ )
+ (?P<id>[^/?#&]+)
+ '''
+
+ _TESTS = [{
+ 'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
+ 'md5': '761769e1eafce0ffebfb4089cb3847cd',
+ 'info_dict': {
+ 'id': '42850523',
+ 'ext': 'mp4',
+ 'title': 'EA Play 2016 Live from the Novo Theatre',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'timestamp': 1465767393,
+ 'upload_date': '20160612',
+ 'creator': 'EA',
+ 'uploader': 'stereotype_',
+ 'uploader_id': '43566419',
+ },
+ }, {
+ # multiple formats
+ 'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://clips.twitch.tv/embed?clip=InquisitiveBreakableYogurtJebaited',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://m.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://go.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ clip = self._download_json(
+ 'https://gql.twitch.tv/gql', video_id, data=json.dumps({
+ 'query': '''{
+ clip(slug: "%s") {
+ broadcaster {
+ displayName
+ }
+ createdAt
+ curator {
+ displayName
+ id
+ }
+ durationSeconds
+ id
+ tiny: thumbnailURL(width: 86, height: 45)
+ small: thumbnailURL(width: 260, height: 147)
+ medium: thumbnailURL(width: 480, height: 272)
+ title
+ videoQualities {
+ frameRate
+ quality
+ sourceURL
+ }
+ viewCount
+ }
+}''' % video_id,
+ }).encode(), headers={
+ 'Client-ID': self._CLIENT_ID,
+ })['data']['clip']
+
+ if not clip:
+ raise ExtractorError(
+ 'This clip is no longer available', expected=True)
+
+ formats = []
+ for option in clip.get('videoQualities', []):
+ if not isinstance(option, dict):
+ continue
+ source = url_or_none(option.get('sourceURL'))
+ if not source:
+ continue
+ formats.append({
+ 'url': source,
+ 'format_id': option.get('quality'),
+ 'height': int_or_none(option.get('quality')),
+ 'fps': int_or_none(option.get('frameRate')),
+ })
+ self._sort_formats(formats)
+
+ thumbnails = []
+ for thumbnail_id in ('tiny', 'small', 'medium'):
+ thumbnail_url = clip.get(thumbnail_id)
+ if not thumbnail_url:
+ continue
+ thumb = {
+ 'id': thumbnail_id,
+ 'url': thumbnail_url,
+ }
+ mobj = re.search(r'-(\d+)x(\d+)\.', thumbnail_url)
+ if mobj:
+ thumb.update({
+ 'height': int(mobj.group(2)),
+ 'width': int(mobj.group(1)),
+ })
+ thumbnails.append(thumb)
+
+ return {
+ 'id': clip.get('id') or video_id,
+ 'title': clip.get('title') or video_id,
+ 'formats': formats,
+ 'duration': int_or_none(clip.get('durationSeconds')),
+ 'views': int_or_none(clip.get('viewCount')),
+ 'timestamp': unified_timestamp(clip.get('createdAt')),
+ 'thumbnails': thumbnails,
+ 'creator': try_get(clip, lambda x: x['broadcaster']['displayName'], compat_str),
+ 'uploader': try_get(clip, lambda x: x['curator']['displayName'], compat_str),
+ 'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str),
+ }
diff --git a/youtube_dlc/extractor/twitter.py b/youtube_dlc/extractor/twitter.py
new file mode 100644
index 000000000..4284487db
--- /dev/null
+++ b/youtube_dlc/extractor/twitter.py
@@ -0,0 +1,610 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_HTTPError,
+ compat_parse_qs,
+ compat_urllib_parse_unquote,
+ compat_urllib_parse_urlparse,
+)
+from ..utils import (
+ dict_get,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ try_get,
+ strip_or_none,
+ unified_timestamp,
+ update_url_query,
+ xpath_text,
+)
+
+from .periscope import (
+ PeriscopeBaseIE,
+ PeriscopeIE,
+)
+
+
+class TwitterBaseIE(InfoExtractor):
+ _API_BASE = 'https://api.twitter.com/1.1/'
+ _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?twitter\.com/'
+ _GUEST_TOKEN = None
+
+ def _extract_variant_formats(self, variant, video_id):
+ variant_url = variant.get('url')
+ if not variant_url:
+ return []
+ elif '.m3u8' in variant_url:
+ return self._extract_m3u8_formats(
+ variant_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False)
+ else:
+ tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
+ f = {
+ 'url': variant_url,
+ 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
+ 'tbr': tbr,
+ }
+ self._search_dimensions_in_video_url(f, variant_url)
+ return [f]
+
+ def _extract_formats_from_vmap_url(self, vmap_url, video_id):
+ vmap_data = self._download_xml(vmap_url, video_id)
+ formats = []
+ urls = []
+ for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
+ video_variant.attrib['url'] = compat_urllib_parse_unquote(
+ video_variant.attrib['url'])
+ urls.append(video_variant.attrib['url'])
+ formats.extend(self._extract_variant_formats(
+ video_variant.attrib, video_id))
+ video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
+ if video_url not in urls:
+ formats.extend(self._extract_variant_formats({'url': video_url}, video_id))
+ return formats
+
+ @staticmethod
+ def _search_dimensions_in_video_url(a_format, video_url):
+ m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
+ if m:
+ a_format.update({
+ 'width': int(m.group('width')),
+ 'height': int(m.group('height')),
+ })
+
+ def _call_api(self, path, video_id, query={}):
+ headers = {
+ 'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw',
+ }
+ if not self._GUEST_TOKEN:
+ self._GUEST_TOKEN = self._download_json(
+ self._API_BASE + 'guest/activate.json', video_id,
+ 'Downloading guest token', data=b'',
+ headers=headers)['guest_token']
+ headers['x-guest-token'] = self._GUEST_TOKEN
+ try:
+ return self._download_json(
+ self._API_BASE + path, video_id, headers=headers, query=query)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ raise ExtractorError(self._parse_json(
+ e.cause.read().decode(),
+ video_id)['errors'][0]['message'], expected=True)
+ raise
+
+
+class TwitterCardIE(InfoExtractor):
+ IE_NAME = 'twitter:card'
+ _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
+ _TESTS = [
+ {
+ 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
+ # MD5 checksums are different in different places
+ 'info_dict': {
+ 'id': '560070183650213889',
+ 'ext': 'mp4',
+ 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
+ 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
+ 'uploader': 'Twitter',
+ 'uploader_id': 'Twitter',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 30.033,
+ 'timestamp': 1422366112,
+ 'upload_date': '20150127',
+ },
+ },
+ {
+ 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
+ 'md5': '7137eca597f72b9abbe61e5ae0161399',
+ 'info_dict': {
+ 'id': '623160978427936768',
+ 'ext': 'mp4',
+ 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
+ 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
+ 'uploader': 'NASA',
+ 'uploader_id': 'NASA',
+ 'timestamp': 1437408129,
+ 'upload_date': '20150720',
+ },
+ },
+ {
+ 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
+ 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
+ 'info_dict': {
+ 'id': 'dq4Oj5quskI',
+ 'ext': 'mp4',
+ 'title': 'Ubuntu 11.10 Overview',
+ 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
+ 'upload_date': '20111013',
+ 'uploader': 'OMG! UBUNTU!',
+ 'uploader_id': 'omgubuntu',
+ },
+ 'add_ie': ['Youtube'],
+ },
+ {
+ 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
+ 'md5': '6dabeaca9e68cbb71c99c322a4b42a11',
+ 'info_dict': {
+ 'id': 'iBb2x00UVlv',
+ 'ext': 'mp4',
+ 'upload_date': '20151113',
+ 'uploader_id': '1189339351084113920',
+ 'uploader': 'ArsenalTerje',
+ 'title': 'Vine by ArsenalTerje',
+ 'timestamp': 1447451307,
+ },
+ 'add_ie': ['Vine'],
+ }, {
+ 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
+ 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
+ 'info_dict': {
+ 'id': '705235433198714880',
+ 'ext': 'mp4',
+ 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
+ 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
+ 'uploader': 'Brent Yarina',
+ 'uploader_id': 'BTNBrentYarina',
+ 'timestamp': 1456976204,
+ 'upload_date': '20160303',
+ },
+ 'skip': 'This content is no longer available.',
+ }, {
+ 'url': 'https://twitter.com/i/videos/752274308186120192',
+ 'only_matching': True,
+ },
+ ]
+
+ def _real_extract(self, url):
+ status_id = self._match_id(url)
+ return self.url_result(
+ 'https://twitter.com/statuses/' + status_id,
+ TwitterIE.ie_key(), status_id)
+
+
+class TwitterIE(TwitterBaseIE):
+ IE_NAME = 'twitter'
+ _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
+ 'info_dict': {
+ 'id': '643211948184596480',
+ 'ext': 'mp4',
+ 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
+ 'uploader': 'FREE THE NIPPLE',
+ 'uploader_id': 'freethenipple',
+ 'duration': 12.922,
+ 'timestamp': 1442188653,
+ 'upload_date': '20150913',
+ 'age_limit': 18,
+ },
+ }, {
+ 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
+ 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
+ 'info_dict': {
+ 'id': '657991469417025536',
+ 'ext': 'mp4',
+ 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
+ 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
+ 'thumbnail': r're:^https?://.*\.png',
+ 'uploader': 'Gifs',
+ 'uploader_id': 'giphz',
+ },
+ 'expected_warnings': ['height', 'width'],
+ 'skip': 'Account suspended',
+ }, {
+ 'url': 'https://twitter.com/starwars/status/665052190608723968',
+ 'info_dict': {
+ 'id': '665052190608723968',
+ 'ext': 'mp4',
+ 'title': 'Star Wars - A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens.',
+ 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
+ 'uploader_id': 'starwars',
+ 'uploader': 'Star Wars',
+ 'timestamp': 1447395772,
+ 'upload_date': '20151113',
+ },
+ }, {
+ 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
+ 'info_dict': {
+ 'id': '705235433198714880',
+ 'ext': 'mp4',
+ 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
+ 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
+ 'uploader_id': 'BTNBrentYarina',
+ 'uploader': 'Brent Yarina',
+ 'timestamp': 1456976204,
+ 'upload_date': '20160303',
+ },
+ 'params': {
+ # The same video as https://twitter.com/i/videos/tweet/705235433198714880
+ # Test case of TwitterCardIE
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
+ 'info_dict': {
+ 'id': '700207533655363584',
+ 'ext': 'mp4',
+ 'title': 'simon vetugo - BEAT PROD: @suhmeduh #Damndaniel',
+ 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'uploader': 'simon vetugo',
+ 'uploader_id': 'simonvertugo',
+ 'duration': 30.0,
+ 'timestamp': 1455777459,
+ 'upload_date': '20160218',
+ },
+ }, {
+ 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
+ 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
+ 'info_dict': {
+ 'id': 'MIOxnrUteUd',
+ 'ext': 'mp4',
+ 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
+ 'uploader': 'TAKUMA',
+ 'uploader_id': '1004126642786242560',
+ 'timestamp': 1402826626,
+ 'upload_date': '20140615',
+ },
+ 'add_ie': ['Vine'],
+ }, {
+ 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
+ 'info_dict': {
+ 'id': '719944021058060289',
+ 'ext': 'mp4',
+ 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
+ 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
+ 'uploader_id': 'CaptainAmerica',
+ 'uploader': 'Captain America',
+ 'duration': 3.17,
+ 'timestamp': 1460483005,
+ 'upload_date': '20160412',
+ },
+ }, {
+ 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
+ 'info_dict': {
+ 'id': '1zqKVVlkqLaKB',
+ 'ext': 'mp4',
+ 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
+ 'upload_date': '20160923',
+ 'uploader_id': '1PmKqpJdOJQoY',
+ 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
+ 'timestamp': 1474613214,
+ },
+ 'add_ie': ['Periscope'],
+ }, {
+ # has mp4 formats via mobile API
+ 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
+ 'info_dict': {
+ 'id': '852138619213144067',
+ 'ext': 'mp4',
+ 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
+ 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
+ 'uploader': 'عالم الأخبار',
+ 'uploader_id': 'news_al3alm',
+ 'duration': 277.4,
+ 'timestamp': 1492000653,
+ 'upload_date': '20170412',
+ },
+ }, {
+ 'url': 'https://twitter.com/i/web/status/910031516746514432',
+ 'info_dict': {
+ 'id': '910031516746514432',
+ 'ext': 'mp4',
+ 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
+ 'uploader': 'Préfet de Guadeloupe',
+ 'uploader_id': 'Prefet971',
+ 'duration': 47.48,
+ 'timestamp': 1505803395,
+ 'upload_date': '20170919',
+ },
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ },
+ }, {
+ # card via api.twitter.com/1.1/videos/tweet/config
+ 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
+ 'info_dict': {
+ 'id': '1001551623938805763',
+ 'ext': 'mp4',
+ 'title': 're:.*?Shep is on a roll today.*?',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
+ 'uploader': 'Lis Power',
+ 'uploader_id': 'LisPower1',
+ 'duration': 111.278,
+ 'timestamp': 1527623489,
+ 'upload_date': '20180529',
+ },
+ 'params': {
+ 'skip_download': True, # requires ffmpeg
+ },
+ }, {
+ 'url': 'https://twitter.com/foobar/status/1087791357756956680',
+ 'info_dict': {
+ 'id': '1087791357756956680',
+ 'ext': 'mp4',
+ 'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
+ 'uploader': 'Twitter',
+ 'uploader_id': 'Twitter',
+ 'duration': 61.567,
+ 'timestamp': 1548184644,
+ 'upload_date': '20190122',
+ },
+ }, {
+ # not available in Periscope
+ 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
+ 'info_dict': {
+ 'id': '1vOGwqejwoWxB',
+ 'ext': 'mp4',
+ 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
+ 'uploader': 'Vivi',
+ 'uploader_id': '1eVjYOLGkGrQL',
+ },
+ 'add_ie': ['TwitterBroadcast'],
+ }, {
+ # Twitch Clip Embed
+ 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
+ 'only_matching': True,
+ }, {
+ # promo_video_website card
+ 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ twid = self._match_id(url)
+ status = self._call_api(
+ 'statuses/show/%s.json' % twid, twid, {
+ 'cards_platform': 'Web-12',
+ 'include_cards': 1,
+ 'include_reply_count': 1,
+ 'include_user_entities': 0,
+ 'tweet_mode': 'extended',
+ })
+
+ title = description = status['full_text'].replace('\n', ' ')
+ # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
+ title = re.sub(r'\s+(https?://[^ ]+)', '', title)
+ user = status.get('user') or {}
+ uploader = user.get('name')
+ if uploader:
+ title = '%s - %s' % (uploader, title)
+ uploader_id = user.get('screen_name')
+
+ tags = []
+ for hashtag in (try_get(status, lambda x: x['entities']['hashtags'], list) or []):
+ hashtag_text = hashtag.get('text')
+ if not hashtag_text:
+ continue
+ tags.append(hashtag_text)
+
+ info = {
+ 'id': twid,
+ 'title': title,
+ 'description': description,
+ 'uploader': uploader,
+ 'timestamp': unified_timestamp(status.get('created_at')),
+ 'uploader_id': uploader_id,
+ 'uploader_url': 'https://twitter.com/' + uploader_id if uploader_id else None,
+ 'like_count': int_or_none(status.get('favorite_count')),
+ 'repost_count': int_or_none(status.get('retweet_count')),
+ 'comment_count': int_or_none(status.get('reply_count')),
+ 'age_limit': 18 if status.get('possibly_sensitive') else 0,
+ 'tags': tags,
+ }
+
+ media = try_get(status, lambda x: x['extended_entities']['media'][0])
+ if media and media.get('type') != 'photo':
+ video_info = media.get('video_info') or {}
+
+ formats = []
+ for variant in video_info.get('variants', []):
+ formats.extend(self._extract_variant_formats(variant, twid))
+ self._sort_formats(formats)
+
+ thumbnails = []
+ media_url = media.get('media_url_https') or media.get('media_url')
+ if media_url:
+ def add_thumbnail(name, size):
+ thumbnails.append({
+ 'id': name,
+ 'url': update_url_query(media_url, {'name': name}),
+ 'width': int_or_none(size.get('w') or size.get('width')),
+ 'height': int_or_none(size.get('h') or size.get('height')),
+ })
+ for name, size in media.get('sizes', {}).items():
+ add_thumbnail(name, size)
+ add_thumbnail('orig', media.get('original_info') or {})
+
+ info.update({
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'duration': float_or_none(video_info.get('duration_millis'), 1000),
+ })
+ else:
+ card = status.get('card')
+ if card:
+ binding_values = card['binding_values']
+
+ def get_binding_value(k):
+ o = binding_values.get(k) or {}
+ return try_get(o, lambda x: x[x['type'].lower() + '_value'])
+
+ card_name = card['name'].split(':')[-1]
+ if card_name in ('amplify', 'promo_video_website'):
+ is_amplify = card_name == 'amplify'
+ vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
+ content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
+ formats = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
+ self._sort_formats(formats)
+
+ thumbnails = []
+ for suffix in ('_small', '', '_large', '_x_large', '_original'):
+ image = get_binding_value('player_image' + suffix) or {}
+ image_url = image.get('url')
+ if not image_url or '/player-placeholder' in image_url:
+ continue
+ thumbnails.append({
+ 'id': suffix[1:] if suffix else 'medium',
+ 'url': image_url,
+ 'width': int_or_none(image.get('width')),
+ 'height': int_or_none(image.get('height')),
+ })
+
+ info.update({
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'duration': int_or_none(get_binding_value(
+ 'content_duration_seconds')),
+ })
+ elif card_name == 'player':
+ info.update({
+ '_type': 'url',
+ 'url': get_binding_value('player_url'),
+ })
+ elif card_name == 'periscope_broadcast':
+ info.update({
+ '_type': 'url',
+ 'url': get_binding_value('url') or get_binding_value('player_url'),
+ 'ie_key': PeriscopeIE.ie_key(),
+ })
+ elif card_name == 'broadcast':
+ info.update({
+ '_type': 'url',
+ 'url': get_binding_value('broadcast_url'),
+ 'ie_key': TwitterBroadcastIE.ie_key(),
+ })
+ else:
+ raise ExtractorError('Unsupported Twitter Card.')
+ else:
+ expanded_url = try_get(status, lambda x: x['entities']['urls'][0]['expanded_url'])
+ if not expanded_url:
+ raise ExtractorError("There's no video in this tweet.")
+ info.update({
+ '_type': 'url',
+ 'url': expanded_url,
+ })
+ return info
+
+
+class TwitterAmplifyIE(TwitterBaseIE):
+ IE_NAME = 'twitter:amplify'
+ _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
+
+ _TEST = {
+ 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
+ 'md5': '7df102d0b9fd7066b86f3159f8e81bf6',
+ 'info_dict': {
+ 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
+ 'ext': 'mp4',
+ 'title': 'Twitter Video',
+ 'thumbnail': 're:^https?://.*',
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ vmap_url = self._html_search_meta(
+ 'twitter:amplify:vmap', webpage, 'vmap url')
+ formats = self._extract_formats_from_vmap_url(vmap_url, video_id)
+
+ thumbnails = []
+ thumbnail = self._html_search_meta(
+ 'twitter:image:src', webpage, 'thumbnail', fatal=False)
+
+ def _find_dimension(target):
+ w = int_or_none(self._html_search_meta(
+ 'twitter:%s:width' % target, webpage, fatal=False))
+ h = int_or_none(self._html_search_meta(
+ 'twitter:%s:height' % target, webpage, fatal=False))
+ return w, h
+
+ if thumbnail:
+ thumbnail_w, thumbnail_h = _find_dimension('image')
+ thumbnails.append({
+ 'url': thumbnail,
+ 'width': thumbnail_w,
+ 'height': thumbnail_h,
+ })
+
+ video_w, video_h = _find_dimension('player')
+ formats[0].update({
+ 'width': video_w,
+ 'height': video_h,
+ })
+
+ return {
+ 'id': video_id,
+ 'title': 'Twitter Video',
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ }
+
+
+class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
+ IE_NAME = 'twitter:broadcast'
+ _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
+
+ _TEST = {
+ # untitled Periscope video
+ 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
+ 'info_dict': {
+ 'id': '1yNGaQLWpejGj',
+ 'ext': 'mp4',
+ 'title': 'Andrea May Sahouri - Periscope Broadcast',
+ 'uploader': 'Andrea May Sahouri',
+ 'uploader_id': '1PXEdBZWpGwKe',
+ },
+ }
+
+ def _real_extract(self, url):
+ broadcast_id = self._match_id(url)
+ broadcast = self._call_api(
+ 'broadcasts/show.json', broadcast_id,
+ {'ids': broadcast_id})['broadcasts'][broadcast_id]
+ info = self._parse_broadcast_data(broadcast, broadcast_id)
+ media_key = broadcast['media_key']
+ source = self._call_api(
+ 'live_video_stream/status/' + media_key, media_key)['source']
+ m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
+ if '/live_video_stream/geoblocked/' in m3u8_url:
+ self.raise_geo_restricted()
+ m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
+ m3u8_url).query).get('type', [None])[0]
+ state, width, height = self._extract_common_format_info(broadcast)
+ info['formats'] = self._extract_pscp_m3u8_formats(
+ m3u8_url, broadcast_id, m3u8_id, state, width, height)
+ return info
diff --git a/youtube_dlc/extractor/udemy.py b/youtube_dlc/extractor/udemy.py
new file mode 100644
index 000000000..60e364d30
--- /dev/null
+++ b/youtube_dlc/extractor/udemy.py
@@ -0,0 +1,481 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_HTTPError,
+ compat_kwargs,
+ compat_str,
+ compat_urllib_request,
+ compat_urlparse,
+)
+from ..utils import (
+ determine_ext,
+ extract_attributes,
+ ExtractorError,
+ float_or_none,
+ int_or_none,
+ js_to_json,
+ sanitized_Request,
+ try_get,
+ unescapeHTML,
+ url_or_none,
+ urlencode_postdata,
+)
+
+
+class UdemyIE(InfoExtractor):
+ IE_NAME = 'udemy'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:[^/]+\.)?udemy\.com/
+ (?:
+ [^#]+\#/lecture/|
+ lecture/view/?\?lectureId=|
+ [^/]+/learn/v4/t/lecture/
+ )
+ (?P<id>\d+)
+ '''
+ _LOGIN_URL = 'https://www.udemy.com/join/login-popup/?displayType=ajax&showSkipButton=1'
+ _ORIGIN_URL = 'https://www.udemy.com'
+ _NETRC_MACHINE = 'udemy'
+
+ _TESTS = [{
+ 'url': 'https://www.udemy.com/java-tutorial/#/lecture/172757',
+ 'md5': '98eda5b657e752cf945d8445e261b5c5',
+ 'info_dict': {
+ 'id': '160614',
+ 'ext': 'mp4',
+ 'title': 'Introduction and Installation',
+ 'description': 'md5:c0d51f6f21ef4ec65f091055a5eef876',
+ 'duration': 579.29,
+ },
+ 'skip': 'Requires udemy account credentials',
+ }, {
+ # new URL schema
+ 'url': 'https://www.udemy.com/electric-bass-right-from-the-start/learn/v4/t/lecture/4580906',
+ 'only_matching': True,
+ }, {
+ # no url in outputs format entry
+ 'url': 'https://www.udemy.com/learn-web-development-complete-step-by-step-guide-to-success/learn/v4/t/lecture/4125812',
+ 'only_matching': True,
+ }, {
+ # only outputs rendition
+ 'url': 'https://www.udemy.com/how-you-can-help-your-local-community-5-amazing-examples/learn/v4/t/lecture/3225750?start=0',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://wipro.udemy.com/java-tutorial/#/lecture/172757',
+ 'only_matching': True,
+ }]
+
+ def _extract_course_info(self, webpage, video_id):
+ course = self._parse_json(
+ unescapeHTML(self._search_regex(
+ r'ng-init=["\'].*\bcourse=({.+?})[;"\']',
+ webpage, 'course', default='{}')),
+ video_id, fatal=False) or {}
+ course_id = course.get('id') or self._search_regex(
+ [
+ r'data-course-id=["\'](\d+)',
+ r'&quot;courseId&quot;\s*:\s*(\d+)'
+ ], webpage, 'course id')
+ return course_id, course.get('title')
+
+ def _enroll_course(self, base_url, webpage, course_id):
+ def combine_url(base_url, url):
+ return compat_urlparse.urljoin(base_url, url) if not url.startswith('http') else url
+
+ checkout_url = unescapeHTML(self._search_regex(
+ r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/(?:payment|cart)/checkout/.+?)\1',
+ webpage, 'checkout url', group='url', default=None))
+ if checkout_url:
+ raise ExtractorError(
+ 'Course %s is not free. You have to pay for it before you can download. '
+ 'Use this URL to confirm purchase: %s'
+ % (course_id, combine_url(base_url, checkout_url)),
+ expected=True)
+
+ enroll_url = unescapeHTML(self._search_regex(
+ r'href=(["\'])(?P<url>(?:https?://(?:www\.)?udemy\.com)?/course/subscribe/.+?)\1',
+ webpage, 'enroll url', group='url', default=None))
+ if enroll_url:
+ webpage = self._download_webpage(
+ combine_url(base_url, enroll_url),
+ course_id, 'Enrolling in the course',
+ headers={'Referer': base_url})
+ if '>You have enrolled in' in webpage:
+ self.to_screen('%s: Successfully enrolled in the course' % course_id)
+
+ def _download_lecture(self, course_id, lecture_id):
+ return self._download_json(
+ 'https://www.udemy.com/api-2.0/users/me/subscribed-courses/%s/lectures/%s?'
+ % (course_id, lecture_id),
+ lecture_id, 'Downloading lecture JSON', query={
+ 'fields[lecture]': 'title,description,view_html,asset',
+ 'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data',
+ })
+
+ def _handle_error(self, response):
+ if not isinstance(response, dict):
+ return
+ error = response.get('error')
+ if error:
+ error_str = 'Udemy returned error #%s: %s' % (error.get('code'), error.get('message'))
+ error_data = error.get('data')
+ if error_data:
+ error_str += ' - %s' % error_data.get('formErrors')
+ raise ExtractorError(error_str, expected=True)
+
+ def _download_webpage_handle(self, *args, **kwargs):
+ headers = kwargs.get('headers', {}).copy()
+ headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'
+ kwargs['headers'] = headers
+ ret = super(UdemyIE, self)._download_webpage_handle(
+ *args, **compat_kwargs(kwargs))
+ if not ret:
+ return ret
+ webpage, _ = ret
+ if any(p in webpage for p in (
+ '>Please verify you are a human',
+ 'Access to this page has been denied because we believe you are using automation tools to browse the website',
+ '"_pxCaptcha"')):
+ raise ExtractorError(
+ 'Udemy asks you to solve a CAPTCHA. Login with browser, '
+ 'solve CAPTCHA, then export cookies and pass cookie file to '
+ 'youtube-dlc with --cookies.', expected=True)
+ return ret
+
+ def _download_json(self, url_or_request, *args, **kwargs):
+ headers = {
+ 'X-Udemy-Snail-Case': 'true',
+ 'X-Requested-With': 'XMLHttpRequest',
+ }
+ for cookie in self._downloader.cookiejar:
+ if cookie.name == 'client_id':
+ headers['X-Udemy-Client-Id'] = cookie.value
+ elif cookie.name == 'access_token':
+ headers['X-Udemy-Bearer-Token'] = cookie.value
+ headers['X-Udemy-Authorization'] = 'Bearer %s' % cookie.value
+
+ if isinstance(url_or_request, compat_urllib_request.Request):
+ for header, value in headers.items():
+ url_or_request.add_header(header, value)
+ else:
+ url_or_request = sanitized_Request(url_or_request, headers=headers)
+
+ response = super(UdemyIE, self)._download_json(url_or_request, *args, **kwargs)
+ self._handle_error(response)
+ return response
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ login_popup = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login popup')
+
+ def is_logged(webpage):
+ return any(re.search(p, webpage) for p in (
+ r'href=["\'](?:https://www\.udemy\.com)?/user/logout/',
+ r'>Logout<'))
+
+ # already logged in
+ if is_logged(login_popup):
+ return
+
+ login_form = self._form_hidden_inputs('login-form', login_popup)
+
+ login_form.update({
+ 'email': username,
+ 'password': password,
+ })
+
+ response = self._download_webpage(
+ self._LOGIN_URL, None, 'Logging in',
+ data=urlencode_postdata(login_form),
+ headers={
+ 'Referer': self._ORIGIN_URL,
+ 'Origin': self._ORIGIN_URL,
+ })
+
+ if not is_logged(response):
+ error = self._html_search_regex(
+ r'(?s)<div[^>]+class="form-errors[^"]*">(.+?)</div>',
+ response, 'error message', default=None)
+ if error:
+ raise ExtractorError('Unable to login: %s' % error, expected=True)
+ raise ExtractorError('Unable to log in')
+
+ def _real_extract(self, url):
+ lecture_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, lecture_id)
+
+ course_id, _ = self._extract_course_info(webpage, lecture_id)
+
+ try:
+ lecture = self._download_lecture(course_id, lecture_id)
+ except ExtractorError as e:
+ # Error could possibly mean we are not enrolled in the course
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ self._enroll_course(url, webpage, course_id)
+ lecture = self._download_lecture(course_id, lecture_id)
+ else:
+ raise
+
+ title = lecture['title']
+ description = lecture.get('description')
+
+ asset = lecture['asset']
+
+ asset_type = asset.get('asset_type') or asset.get('assetType')
+ if asset_type != 'Video':
+ raise ExtractorError(
+ 'Lecture %s is not a video' % lecture_id, expected=True)
+
+ stream_url = asset.get('stream_url') or asset.get('streamUrl')
+ if stream_url:
+ youtube_url = self._search_regex(
+ r'(https?://www\.youtube\.com/watch\?v=.*)', stream_url, 'youtube URL', default=None)
+ if youtube_url:
+ return self.url_result(youtube_url, 'Youtube')
+
+ video_id = compat_str(asset['id'])
+ thumbnail = asset.get('thumbnail_url') or asset.get('thumbnailUrl')
+ duration = float_or_none(asset.get('data', {}).get('duration'))
+
+ subtitles = {}
+ automatic_captions = {}
+
+ formats = []
+
+ def extract_output_format(src, f_id):
+ return {
+ 'url': src.get('url'),
+ 'format_id': '%sp' % (src.get('height') or f_id),
+ 'width': int_or_none(src.get('width')),
+ 'height': int_or_none(src.get('height')),
+ 'vbr': int_or_none(src.get('video_bitrate_in_kbps')),
+ 'vcodec': src.get('video_codec'),
+ 'fps': int_or_none(src.get('frame_rate')),
+ 'abr': int_or_none(src.get('audio_bitrate_in_kbps')),
+ 'acodec': src.get('audio_codec'),
+ 'asr': int_or_none(src.get('audio_sample_rate')),
+ 'tbr': int_or_none(src.get('total_bitrate_in_kbps')),
+ 'filesize': int_or_none(src.get('file_size_in_bytes')),
+ }
+
+ outputs = asset.get('data', {}).get('outputs')
+ if not isinstance(outputs, dict):
+ outputs = {}
+
+ def add_output_format_meta(f, key):
+ output = outputs.get(key)
+ if isinstance(output, dict):
+ output_format = extract_output_format(output, key)
+ output_format.update(f)
+ return output_format
+ return f
+
+ def extract_formats(source_list):
+ if not isinstance(source_list, list):
+ return
+ for source in source_list:
+ video_url = url_or_none(source.get('file') or source.get('src'))
+ if not video_url:
+ continue
+ if source.get('type') == 'application/x-mpegURL' or determine_ext(video_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ continue
+ format_id = source.get('label')
+ f = {
+ 'url': video_url,
+ 'format_id': '%sp' % format_id,
+ 'height': int_or_none(format_id),
+ }
+ if format_id:
+ # Some videos contain additional metadata (e.g.
+ # https://www.udemy.com/ios9-swift/learn/#/lecture/3383208)
+ f = add_output_format_meta(f, format_id)
+ formats.append(f)
+
+ def extract_subtitles(track_list):
+ if not isinstance(track_list, list):
+ return
+ for track in track_list:
+ if not isinstance(track, dict):
+ continue
+ if track.get('kind') != 'captions':
+ continue
+ src = url_or_none(track.get('src'))
+ if not src:
+ continue
+ lang = track.get('language') or track.get(
+ 'srclang') or track.get('label')
+ sub_dict = automatic_captions if track.get(
+ 'autogenerated') is True else subtitles
+ sub_dict.setdefault(lang, []).append({
+ 'url': src,
+ })
+
+ for url_kind in ('download', 'stream'):
+ urls = asset.get('%s_urls' % url_kind)
+ if isinstance(urls, dict):
+ extract_formats(urls.get('Video'))
+
+ captions = asset.get('captions')
+ if isinstance(captions, list):
+ for cc in captions:
+ if not isinstance(cc, dict):
+ continue
+ cc_url = url_or_none(cc.get('url'))
+ if not cc_url:
+ continue
+ lang = try_get(cc, lambda x: x['locale']['locale'], compat_str)
+ sub_dict = (automatic_captions if cc.get('source') == 'auto'
+ else subtitles)
+ sub_dict.setdefault(lang or 'en', []).append({
+ 'url': cc_url,
+ })
+
+ view_html = lecture.get('view_html')
+ if view_html:
+ view_html_urls = set()
+ for source in re.findall(r'<source[^>]+>', view_html):
+ attributes = extract_attributes(source)
+ src = attributes.get('src')
+ if not src:
+ continue
+ res = attributes.get('data-res')
+ height = int_or_none(res)
+ if src in view_html_urls:
+ continue
+ view_html_urls.add(src)
+ if attributes.get('type') == 'application/x-mpegURL' or determine_ext(src) == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(
+ src, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False)
+ for f in m3u8_formats:
+ m = re.search(r'/hls_(?P<height>\d{3,4})_(?P<tbr>\d{2,})/', f['url'])
+ if m:
+ if not f.get('height'):
+ f['height'] = int(m.group('height'))
+ if not f.get('tbr'):
+ f['tbr'] = int(m.group('tbr'))
+ formats.extend(m3u8_formats)
+ else:
+ formats.append(add_output_format_meta({
+ 'url': src,
+ 'format_id': '%dp' % height if height else None,
+ 'height': height,
+ }, res))
+
+ # react rendition since 2017.04.15 (see
+ # https://github.com/ytdl-org/youtube-dl/issues/12744)
+ data = self._parse_json(
+ self._search_regex(
+ r'videojs-setup-data=(["\'])(?P<data>{.+?})\1', view_html,
+ 'setup data', default='{}', group='data'), video_id,
+ transform_source=unescapeHTML, fatal=False)
+ if data and isinstance(data, dict):
+ extract_formats(data.get('sources'))
+ if not duration:
+ duration = int_or_none(data.get('duration'))
+ extract_subtitles(data.get('tracks'))
+
+ if not subtitles and not automatic_captions:
+ text_tracks = self._parse_json(
+ self._search_regex(
+ r'text-tracks=(["\'])(?P<data>\[.+?\])\1', view_html,
+ 'text tracks', default='{}', group='data'), video_id,
+ transform_source=lambda s: js_to_json(unescapeHTML(s)),
+ fatal=False)
+ extract_subtitles(text_tracks)
+
+ if not formats and outputs:
+ for format_id, output in outputs.items():
+ f = extract_output_format(output, format_id)
+ if f.get('url'):
+ formats.append(f)
+
+ self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id'))
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'automatic_captions': automatic_captions,
+ }
+
+
+class UdemyCourseIE(UdemyIE):
+ IE_NAME = 'udemy:course'
+ _VALID_URL = r'https?://(?:[^/]+\.)?udemy\.com/(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.udemy.com/java-tutorial/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://wipro.udemy.com/java-tutorial/',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if UdemyIE.suitable(url) else super(UdemyCourseIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ course_path = self._match_id(url)
+
+ webpage = self._download_webpage(url, course_path)
+
+ course_id, title = self._extract_course_info(webpage, course_path)
+
+ self._enroll_course(url, webpage, course_id)
+
+ response = self._download_json(
+ 'https://www.udemy.com/api-2.0/courses/%s/cached-subscriber-curriculum-items' % course_id,
+ course_id, 'Downloading course curriculum', query={
+ 'fields[chapter]': 'title,object_index',
+ 'fields[lecture]': 'title,asset',
+ 'page_size': '1000',
+ })
+
+ entries = []
+ chapter, chapter_number = [None] * 2
+ for entry in response['results']:
+ clazz = entry.get('_class')
+ if clazz == 'lecture':
+ asset = entry.get('asset')
+ if isinstance(asset, dict):
+ asset_type = asset.get('asset_type') or asset.get('assetType')
+ if asset_type != 'Video':
+ continue
+ lecture_id = entry.get('id')
+ if lecture_id:
+ entry = {
+ '_type': 'url_transparent',
+ 'url': 'https://www.udemy.com/%s/learn/v4/t/lecture/%s' % (course_path, entry['id']),
+ 'title': entry.get('title'),
+ 'ie_key': UdemyIE.ie_key(),
+ }
+ if chapter_number:
+ entry['chapter_number'] = chapter_number
+ if chapter:
+ entry['chapter'] = chapter
+ entries.append(entry)
+ elif clazz == 'chapter':
+ chapter_number = entry.get('object_index')
+ chapter = entry.get('title')
+
+ return self.playlist_result(entries, course_id, title)
diff --git a/youtube_dl/extractor/udn.py b/youtube_dlc/extractor/udn.py
index 2c8e5c7b4..2c8e5c7b4 100644
--- a/youtube_dl/extractor/udn.py
+++ b/youtube_dlc/extractor/udn.py
diff --git a/youtube_dlc/extractor/ufctv.py b/youtube_dlc/extractor/ufctv.py
new file mode 100644
index 000000000..3d74ba071
--- /dev/null
+++ b/youtube_dlc/extractor/ufctv.py
@@ -0,0 +1,16 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .imggaming import ImgGamingBaseIE
+
+
+class UFCTVIE(ImgGamingBaseIE):
+ _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:(?:app|www)\.)?(?:ufc\.tv|(?:ufc)?fightpass\.com)|ufcfightpass\.img(?:dge|gaming)\.com'
+ _NETRC_MACHINE = 'ufctv'
+ _REALM = 'ufc'
+
+
+class UFCArabiaIE(ImgGamingBaseIE):
+ _VALID_URL = ImgGamingBaseIE._VALID_URL_TEMPL % r'(?:(?:app|www)\.)?ufcarabia\.(?:ae|com)'
+ _NETRC_MACHINE = 'ufcarabia'
+ _REALM = 'admufc'
diff --git a/youtube_dl/extractor/uktvplay.py b/youtube_dlc/extractor/uktvplay.py
index 2137502a1..2137502a1 100644
--- a/youtube_dl/extractor/uktvplay.py
+++ b/youtube_dlc/extractor/uktvplay.py
diff --git a/youtube_dl/extractor/umg.py b/youtube_dlc/extractor/umg.py
index d815cd9a6..d815cd9a6 100644
--- a/youtube_dl/extractor/umg.py
+++ b/youtube_dlc/extractor/umg.py
diff --git a/youtube_dl/extractor/unistra.py b/youtube_dlc/extractor/unistra.py
index a724cdbef..a724cdbef 100644
--- a/youtube_dl/extractor/unistra.py
+++ b/youtube_dlc/extractor/unistra.py
diff --git a/youtube_dl/extractor/unity.py b/youtube_dlc/extractor/unity.py
index 73daacf29..73daacf29 100644
--- a/youtube_dl/extractor/unity.py
+++ b/youtube_dlc/extractor/unity.py
diff --git a/youtube_dlc/extractor/uol.py b/youtube_dlc/extractor/uol.py
new file mode 100644
index 000000000..628adf219
--- /dev/null
+++ b/youtube_dlc/extractor/uol.py
@@ -0,0 +1,144 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urllib_parse_urlencode,
+)
+from ..utils import (
+ clean_html,
+ int_or_none,
+ parse_duration,
+ parse_iso8601,
+ qualities,
+ update_url_query,
+)
+
+
+class UOLIE(InfoExtractor):
+ IE_NAME = 'uol.com.br'
+ _VALID_URL = r'https?://(?:.+?\.)?uol\.com\.br/.*?(?:(?:mediaId|v)=|view/(?:[a-z0-9]+/)?|video(?:=|/(?:\d{4}/\d{2}/\d{2}/)?))(?P<id>\d+|[\w-]+-[A-Z0-9]+)'
+ _TESTS = [{
+ 'url': 'http://player.mais.uol.com.br/player_video_v3.swf?mediaId=15951931',
+ 'md5': '4f1e26683979715ff64e4e29099cf020',
+ 'info_dict': {
+ 'id': '15951931',
+ 'ext': 'mp4',
+ 'title': 'Miss simpatia é encontrada morta',
+ 'description': 'md5:3f8c11a0c0556d66daf7e5b45ef823b2',
+ 'timestamp': 1470421860,
+ 'upload_date': '20160805',
+ }
+ }, {
+ 'url': 'http://tvuol.uol.com.br/video/incendio-destroi-uma-das-maiores-casas-noturnas-de-londres-04024E9A3268D4C95326',
+ 'md5': '2850a0e8dfa0a7307e04a96c5bdc5bc2',
+ 'info_dict': {
+ 'id': '15954259',
+ 'ext': 'mp4',
+ 'title': 'Incêndio destrói uma das maiores casas noturnas de Londres',
+ 'description': 'Em Londres, um incêndio destruiu uma das maiores boates da cidade. Não há informações sobre vítimas.',
+ 'timestamp': 1470674520,
+ 'upload_date': '20160808',
+ }
+ }, {
+ 'url': 'http://mais.uol.com.br/static/uolplayer/index.html?mediaId=15951931',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://mais.uol.com.br/view/15954259',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://noticias.band.uol.com.br/brasilurgente/video/2016/08/05/15951931/miss-simpatia-e-encontrada-morta.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videos.band.uol.com.br/programa.asp?e=noticias&pr=brasil-urgente&v=15951931&t=Policia-desmonte-base-do-PCC-na-Cracolandia',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://mais.uol.com.br/view/cphaa0gl2x8r/incendio-destroi-uma-das-maiores-casas-noturnas-de-londres-04024E9A3268D4C95326',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://noticias.uol.com.br//videos/assistir.htm?video=rafaela-silva-inspira-criancas-no-judo-04024D983968D4C95326',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://mais.uol.com.br/view/e0qbgxid79uv/15275470',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video_data = self._download_json(
+ # https://api.mais.uol.com.br/apiuol/v4/player/data/[MEDIA_ID]
+ 'https://api.mais.uol.com.br/apiuol/v3/media/detail/' + video_id,
+ video_id)['item']
+ media_id = compat_str(video_data['mediaId'])
+ title = video_data['title']
+ ver = video_data.get('revision', 2)
+
+ uol_formats = self._download_json(
+ 'https://croupier.mais.uol.com.br/v3/formats/%s/jsonp' % media_id,
+ media_id)
+ quality = qualities(['mobile', 'WEBM', '360p', '720p', '1080p'])
+ formats = []
+ for format_id, f in uol_formats.items():
+ if not isinstance(f, dict):
+ continue
+ f_url = f.get('url') or f.get('secureUrl')
+ if not f_url:
+ continue
+ query = {
+ 'ver': ver,
+ 'r': 'http://mais.uol.com.br',
+ }
+ for k in ('token', 'sign'):
+ v = f.get(k)
+ if v:
+ query[k] = v
+ f_url = update_url_query(f_url, query)
+ format_id = format_id
+ if format_id == 'HLS':
+ m3u8_formats = self._extract_m3u8_formats(
+ f_url, media_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False)
+ encoded_query = compat_urllib_parse_urlencode(query)
+ for m3u8_f in m3u8_formats:
+ m3u8_f['extra_param_to_segment_url'] = encoded_query
+ m3u8_f['url'] = update_url_query(m3u8_f['url'], query)
+ formats.extend(m3u8_formats)
+ continue
+ formats.append({
+ 'format_id': format_id,
+ 'url': f_url,
+ 'quality': quality(format_id),
+ 'preference': -1,
+ })
+ self._sort_formats(formats)
+
+ tags = []
+ for tag in video_data.get('tags', []):
+ tag_description = tag.get('description')
+ if not tag_description:
+ continue
+ tags.append(tag_description)
+
+ thumbnails = []
+ for q in ('Small', 'Medium', 'Wmedium', 'Large', 'Wlarge', 'Xlarge'):
+ q_url = video_data.get('thumb' + q)
+ if not q_url:
+ continue
+ thumbnails.append({
+ 'id': q,
+ 'url': q_url,
+ })
+
+ return {
+ 'id': media_id,
+ 'title': title,
+ 'description': clean_html(video_data.get('description')),
+ 'thumbnails': thumbnails,
+ 'duration': parse_duration(video_data.get('duration')),
+ 'tags': tags,
+ 'formats': formats,
+ 'timestamp': parse_iso8601(video_data.get('publishDate'), ' '),
+ 'view_count': int_or_none(video_data.get('viewsQtty')),
+ }
diff --git a/youtube_dl/extractor/uplynk.py b/youtube_dlc/extractor/uplynk.py
index f06bf5b12..f06bf5b12 100644
--- a/youtube_dl/extractor/uplynk.py
+++ b/youtube_dlc/extractor/uplynk.py
diff --git a/youtube_dl/extractor/urort.py b/youtube_dlc/extractor/urort.py
index 8f6edab4b..8f6edab4b 100644
--- a/youtube_dl/extractor/urort.py
+++ b/youtube_dlc/extractor/urort.py
diff --git a/youtube_dl/extractor/urplay.py b/youtube_dlc/extractor/urplay.py
index 6030b7cb5..6030b7cb5 100644
--- a/youtube_dl/extractor/urplay.py
+++ b/youtube_dlc/extractor/urplay.py
diff --git a/youtube_dl/extractor/usanetwork.py b/youtube_dlc/extractor/usanetwork.py
index 54c7495cc..54c7495cc 100644
--- a/youtube_dl/extractor/usanetwork.py
+++ b/youtube_dlc/extractor/usanetwork.py
diff --git a/youtube_dl/extractor/usatoday.py b/youtube_dlc/extractor/usatoday.py
index b2103448d..b2103448d 100644
--- a/youtube_dl/extractor/usatoday.py
+++ b/youtube_dlc/extractor/usatoday.py
diff --git a/youtube_dl/extractor/ustream.py b/youtube_dlc/extractor/ustream.py
index 582090d0d..582090d0d 100644
--- a/youtube_dl/extractor/ustream.py
+++ b/youtube_dlc/extractor/ustream.py
diff --git a/youtube_dl/extractor/ustudio.py b/youtube_dlc/extractor/ustudio.py
index 56509beed..56509beed 100644
--- a/youtube_dl/extractor/ustudio.py
+++ b/youtube_dlc/extractor/ustudio.py
diff --git a/youtube_dl/extractor/varzesh3.py b/youtube_dlc/extractor/varzesh3.py
index f474ed73f..f474ed73f 100644
--- a/youtube_dl/extractor/varzesh3.py
+++ b/youtube_dlc/extractor/varzesh3.py
diff --git a/youtube_dl/extractor/vbox7.py b/youtube_dlc/extractor/vbox7.py
index 8152acefd..8152acefd 100644
--- a/youtube_dl/extractor/vbox7.py
+++ b/youtube_dlc/extractor/vbox7.py
diff --git a/youtube_dl/extractor/veehd.py b/youtube_dlc/extractor/veehd.py
index a6dc3c8d8..a6dc3c8d8 100644
--- a/youtube_dl/extractor/veehd.py
+++ b/youtube_dlc/extractor/veehd.py
diff --git a/youtube_dl/extractor/veoh.py b/youtube_dlc/extractor/veoh.py
index 1c44c145c..1c44c145c 100644
--- a/youtube_dl/extractor/veoh.py
+++ b/youtube_dlc/extractor/veoh.py
diff --git a/youtube_dl/extractor/vesti.py b/youtube_dlc/extractor/vesti.py
index 5ab716880..5ab716880 100644
--- a/youtube_dl/extractor/vesti.py
+++ b/youtube_dlc/extractor/vesti.py
diff --git a/youtube_dl/extractor/vevo.py b/youtube_dlc/extractor/vevo.py
index 4ea9f1b4b..4ea9f1b4b 100644
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dlc/extractor/vevo.py
diff --git a/youtube_dl/extractor/vgtv.py b/youtube_dlc/extractor/vgtv.py
index fe7a26b62..fe7a26b62 100644
--- a/youtube_dl/extractor/vgtv.py
+++ b/youtube_dlc/extractor/vgtv.py
diff --git a/youtube_dl/extractor/vh1.py b/youtube_dlc/extractor/vh1.py
index dff94a2b8..dff94a2b8 100644
--- a/youtube_dl/extractor/vh1.py
+++ b/youtube_dlc/extractor/vh1.py
diff --git a/youtube_dlc/extractor/vice.py b/youtube_dlc/extractor/vice.py
new file mode 100644
index 000000000..e37499512
--- /dev/null
+++ b/youtube_dlc/extractor/vice.py
@@ -0,0 +1,337 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import functools
+import hashlib
+import json
+import random
+import re
+import time
+
+from .adobepass import AdobePassIE
+from .common import InfoExtractor
+from .youtube import YoutubeIE
+from ..compat import (
+ compat_HTTPError,
+ compat_str,
+)
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ OnDemandPagedList,
+ parse_age_limit,
+ str_or_none,
+ try_get,
+)
+
+
+class ViceBaseIE(InfoExtractor):
+ def _call_api(self, resource, resource_key, resource_id, locale, fields, args=''):
+ return self._download_json(
+ 'https://video.vice.com/api/v1/graphql', resource_id, query={
+ 'query': '''{
+ %s(locale: "%s", %s: "%s"%s) {
+ %s
+ }
+}''' % (resource, locale, resource_key, resource_id, args, fields),
+ })['data'][resource]
+
+
+class ViceIE(ViceBaseIE, AdobePassIE):
+ IE_NAME = 'vice'
+ _VALID_URL = r'https?://(?:(?:video|vms)\.vice|(?:www\.)?vice(?:land|tv))\.com/(?P<locale>[^/]+)/(?:video/[^/]+|embed)/(?P<id>[\da-f]{24})'
+ _TESTS = [{
+ 'url': 'https://video.vice.com/en_us/video/pet-cremator/58c69e38a55424f1227dc3f7',
+ 'info_dict': {
+ 'id': '58c69e38a55424f1227dc3f7',
+ 'ext': 'mp4',
+ 'title': '10 Questions You Always Wanted To Ask: Pet Cremator',
+ 'description': 'md5:fe856caacf61fe0e74fab15ce2b07ca5',
+ 'uploader': 'vice',
+ 'uploader_id': '57a204088cb727dec794c67b',
+ 'timestamp': 1489664942,
+ 'upload_date': '20170316',
+ 'age_limit': 14,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # geo restricted to US
+ 'url': 'https://video.vice.com/en_us/video/the-signal-from-tolva/5816510690b70e6c5fd39a56',
+ 'info_dict': {
+ 'id': '5816510690b70e6c5fd39a56',
+ 'ext': 'mp4',
+ 'uploader': 'vice',
+ 'title': 'The Signal From Tölva',
+ 'description': 'md5:3927e3c79f9e8094606a2b3c5b5e55d5',
+ 'uploader_id': '57a204088cb727dec794c67b',
+ 'timestamp': 1477941983,
+ 'upload_date': '20161031',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://video.vice.com/alps/video/ulfs-wien-beruchtigste-grafitti-crew-part-1/581b12b60a0e1f4c0fb6ea2f',
+ 'info_dict': {
+ 'id': '581b12b60a0e1f4c0fb6ea2f',
+ 'ext': 'mp4',
+ 'title': 'ULFs - Wien berüchtigste Grafitti Crew - Part 1',
+ 'description': 'Zwischen Hinterzimmer-Tattoos und U-Bahnschächten erzählen uns die Ulfs, wie es ist, "süchtig nach Sachbeschädigung" zu sein.',
+ 'uploader': 'vice',
+ 'uploader_id': '57a204088cb727dec794c67b',
+ 'timestamp': 1485368119,
+ 'upload_date': '20170125',
+ 'age_limit': 14,
+ },
+ 'params': {
+ # AES-encrypted m3u8
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://video.vice.com/en_us/embed/57f41d3556a0a80f54726060',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vms.vice.com/en_us/video/preplay/58c69e38a55424f1227dc3f7',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.viceland.com/en_us/video/thursday-march-1-2018/5a8f2d7ff1cdb332dd446ec1',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe\b[^>]+\bsrc=["\']((?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]{24})',
+ webpage)
+
+ @staticmethod
+ def _extract_url(webpage):
+ urls = ViceIE._extract_urls(webpage)
+ return urls[0] if urls else None
+
+ def _real_extract(self, url):
+ locale, video_id = re.match(self._VALID_URL, url).groups()
+
+ video = self._call_api('videos', 'id', video_id, locale, '''body
+ locked
+ rating
+ thumbnail_url
+ title''')[0]
+ title = video['title'].strip()
+ rating = video.get('rating')
+
+ query = {}
+ if video.get('locked'):
+ resource = self._get_mvpd_resource(
+ 'VICELAND', title, video_id, rating)
+ query['tvetoken'] = self._extract_mvpd_auth(
+ url, video_id, 'VICELAND', resource)
+
+ # signature generation algorithm is reverse engineered from signatureGenerator in
+ # webpack:///../shared/~/vice-player/dist/js/vice-player.js in
+ # https://www.viceland.com/assets/common/js/web.vendor.bundle.js
+ # new JS is located here https://vice-web-statics-cdn.vice.com/vice-player/player-embed.js
+ exp = int(time.time()) + 1440
+
+ query.update({
+ 'exp': exp,
+ 'sign': hashlib.sha512(('%s:GET:%d' % (video_id, exp)).encode()).hexdigest(),
+ 'skipadstitching': 1,
+ 'platform': 'desktop',
+ 'rn': random.randint(10000, 100000),
+ })
+
+ try:
+ preplay = self._download_json(
+ 'https://vms.vice.com/%s/video/preplay/%s' % (locale, video_id),
+ video_id, query=query)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401):
+ error = json.loads(e.cause.read().decode())
+ error_message = error.get('error_description') or error['details']
+ raise ExtractorError('%s said: %s' % (
+ self.IE_NAME, error_message), expected=True)
+ raise
+
+ video_data = preplay['video']
+ formats = self._extract_m3u8_formats(
+ preplay['playURL'], video_id, 'mp4', 'm3u8_native')
+ self._sort_formats(formats)
+ episode = video_data.get('episode') or {}
+ channel = video_data.get('channel') or {}
+ season = video_data.get('season') or {}
+
+ subtitles = {}
+ for subtitle in preplay.get('subtitleURLs', []):
+ cc_url = subtitle.get('url')
+ if not cc_url:
+ continue
+ language_code = try_get(subtitle, lambda x: x['languages'][0]['language_code'], compat_str) or 'en'
+ subtitles.setdefault(language_code, []).append({
+ 'url': cc_url,
+ })
+
+ return {
+ 'formats': formats,
+ 'id': video_id,
+ 'title': title,
+ 'description': clean_html(video.get('body')),
+ 'thumbnail': video.get('thumbnail_url'),
+ 'duration': int_or_none(video_data.get('video_duration')),
+ 'timestamp': int_or_none(video_data.get('created_at'), 1000),
+ 'age_limit': parse_age_limit(video_data.get('video_rating') or rating),
+ 'series': try_get(video_data, lambda x: x['show']['base']['display_title'], compat_str),
+ 'episode_number': int_or_none(episode.get('episode_number')),
+ 'episode_id': str_or_none(episode.get('id') or video_data.get('episode_id')),
+ 'season_number': int_or_none(season.get('season_number')),
+ 'season_id': str_or_none(season.get('id') or video_data.get('season_id')),
+ 'uploader': channel.get('name'),
+ 'uploader_id': str_or_none(channel.get('id')),
+ 'subtitles': subtitles,
+ }
+
+
+class ViceShowIE(ViceBaseIE):
+ IE_NAME = 'vice:show'
+ _VALID_URL = r'https?://(?:video\.vice|(?:www\.)?vice(?:land|tv))\.com/(?P<locale>[^/]+)/show/(?P<id>[^/?#&]+)'
+ _PAGE_SIZE = 25
+ _TESTS = [{
+ 'url': 'https://video.vice.com/en_us/show/fck-thats-delicious',
+ 'info_dict': {
+ 'id': '57a2040c8cb727dec794c901',
+ 'title': 'F*ck, That’s Delicious',
+ 'description': 'The life and eating habits of rap’s greatest bon vivant, Action Bronson.',
+ },
+ 'playlist_mincount': 64,
+ }, {
+ 'url': 'https://www.vicetv.com/en_us/show/fck-thats-delicious',
+ 'only_matching': True,
+ }]
+
+ def _fetch_page(self, locale, show_id, page):
+ videos = self._call_api('videos', 'show_id', show_id, locale, '''body
+ id
+ url''', ', page: %d, per_page: %d' % (page + 1, self._PAGE_SIZE))
+ for video in videos:
+ yield self.url_result(
+ video['url'], ViceIE.ie_key(), video.get('id'))
+
+ def _real_extract(self, url):
+ locale, display_id = re.match(self._VALID_URL, url).groups()
+ show = self._call_api('shows', 'slug', display_id, locale, '''dek
+ id
+ title''')[0]
+ show_id = show['id']
+
+ entries = OnDemandPagedList(
+ functools.partial(self._fetch_page, locale, show_id),
+ self._PAGE_SIZE)
+
+ return self.playlist_result(
+ entries, show_id, show.get('title'), show.get('dek'))
+
+
+class ViceArticleIE(ViceBaseIE):
+ IE_NAME = 'vice:article'
+ _VALID_URL = r'https://(?:www\.)?vice\.com/(?P<locale>[^/]+)/article/(?:[0-9a-z]{6}/)?(?P<id>[^?#]+)'
+
+ _TESTS = [{
+ 'url': 'https://www.vice.com/en_us/article/on-set-with-the-woman-making-mormon-porn-in-utah',
+ 'info_dict': {
+ 'id': '58dc0a3dee202d2a0ccfcbd8',
+ 'ext': 'mp4',
+ 'title': 'Mormon War on Porn',
+ 'description': 'md5:1c5d91fe25fa8aa304f9def118b92dbf',
+ 'uploader': 'vice',
+ 'uploader_id': '57a204088cb727dec794c67b',
+ 'timestamp': 1491883129,
+ 'upload_date': '20170411',
+ 'age_limit': 17,
+ },
+ 'params': {
+ # AES-encrypted m3u8
+ 'skip_download': True,
+ },
+ 'add_ie': [ViceIE.ie_key()],
+ }, {
+ 'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car',
+ 'md5': '13010ee0bc694ea87ec40724397c2349',
+ 'info_dict': {
+ 'id': '3jstaBeXgAs',
+ 'ext': 'mp4',
+ 'title': 'How to Hack a Car: Phreaked Out (Episode 2)',
+ 'description': 'md5:ee95453f7ff495db8efe14ae8bf56f30',
+ 'uploader': 'Motherboard',
+ 'uploader_id': 'MotherboardTV',
+ 'upload_date': '20140529',
+ },
+ 'add_ie': [YoutubeIE.ie_key()],
+ }, {
+ 'url': 'https://www.vice.com/en_us/article/znm9dx/karley-sciortino-slutever-reloaded',
+ 'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
+ 'info_dict': {
+ 'id': '57f41d3556a0a80f54726060',
+ 'ext': 'mp4',
+ 'title': "Making The World's First Male Sex Doll",
+ 'description': 'md5:19b00b215b99961cf869c40fbe9df755',
+ 'uploader': 'vice',
+ 'uploader_id': '57a204088cb727dec794c67b',
+ 'timestamp': 1476919911,
+ 'upload_date': '20161019',
+ 'age_limit': 17,
+ },
+ 'params': {
+ 'skip_download': True,
+ 'format': 'bestvideo',
+ },
+ 'add_ie': [ViceIE.ie_key()],
+ }, {
+ 'url': 'https://www.vice.com/en_us/article/cowboy-capitalists-part-1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.vice.com/ru/article/big-night-out-ibiza-clive-martin-229',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ locale, display_id = re.match(self._VALID_URL, url).groups()
+
+ article = self._call_api('articles', 'slug', display_id, locale, '''body
+ embed_code''')[0]
+ body = article['body']
+
+ def _url_res(video_url, ie_key):
+ return {
+ '_type': 'url_transparent',
+ 'url': video_url,
+ 'display_id': display_id,
+ 'ie_key': ie_key,
+ }
+
+ vice_url = ViceIE._extract_url(body)
+ if vice_url:
+ return _url_res(vice_url, ViceIE.ie_key())
+
+ embed_code = self._search_regex(
+ r'embedCode=([^&\'"]+)', body,
+ 'ooyala embed code', default=None)
+ if embed_code:
+ return _url_res('ooyala:%s' % embed_code, 'Ooyala')
+
+ youtube_url = YoutubeIE._extract_url(body)
+ if youtube_url:
+ return _url_res(youtube_url, YoutubeIE.ie_key())
+
+ video_url = self._html_search_regex(
+ r'data-video-url="([^"]+)"',
+ article['embed_code'], 'video URL')
+
+ return _url_res(video_url, ViceIE.ie_key())
diff --git a/youtube_dl/extractor/vidbit.py b/youtube_dlc/extractor/vidbit.py
index 91f45b7cc..91f45b7cc 100644
--- a/youtube_dl/extractor/vidbit.py
+++ b/youtube_dlc/extractor/vidbit.py
diff --git a/youtube_dl/extractor/viddler.py b/youtube_dlc/extractor/viddler.py
index 642358433..642358433 100644
--- a/youtube_dl/extractor/viddler.py
+++ b/youtube_dlc/extractor/viddler.py
diff --git a/youtube_dlc/extractor/videa.py b/youtube_dlc/extractor/videa.py
new file mode 100644
index 000000000..a03614cc1
--- /dev/null
+++ b/youtube_dlc/extractor/videa.py
@@ -0,0 +1,164 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import random
+import string
+import struct
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ mimetype2ext,
+ parse_codecs,
+ xpath_element,
+ xpath_text,
+)
+from ..compat import (
+ compat_b64decode,
+ compat_ord,
+ compat_parse_qs,
+)
+
+
+class VideaIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ https?://
+ videa(?:kid)?\.hu/
+ (?:
+ videok/(?:[^/]+/)*[^?#&]+-|
+ player\?.*?\bv=|
+ player/v/
+ )
+ (?P<id>[^?#&]+)
+ '''
+ _TESTS = [{
+ 'url': 'http://videa.hu/videok/allatok/az-orult-kigyasz-285-kigyot-kigyo-8YfIAjxwWGwT8HVQ',
+ 'md5': '97a7af41faeaffd9f1fc864a7c7e7603',
+ 'info_dict': {
+ 'id': '8YfIAjxwWGwT8HVQ',
+ 'ext': 'mp4',
+ 'title': 'Az őrült kígyász 285 kígyót enged szabadon',
+ 'thumbnail': r're:^https?://.*',
+ 'duration': 21,
+ },
+ }, {
+ 'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://videakid.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://videakid.hu/player?v=8YfIAjxwWGwT8HVQ',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://videakid.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [url for _, url in re.findall(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1',
+ webpage)]
+
+ def rc4(self, ciphertext, key):
+ res = b''
+
+ keyLen = len(key)
+ S = list(range(256))
+
+ j = 0
+ for i in range(256):
+ j = (j + S[i] + ord(key[i % keyLen])) % 256
+ S[i], S[j] = S[j], S[i]
+
+ i = 0
+ j = 0
+ for m in range(len(ciphertext)):
+ i = (i + 1) % 256
+ j = (j + S[i]) % 256
+ S[i], S[j] = S[j], S[i]
+ k = S[(S[i] + S[j]) % 256]
+ res += struct.pack("B", k ^ compat_ord(ciphertext[m]))
+
+ return res
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id, fatal=True)
+ error = self._search_regex(r'<p class="error-text">([^<]+)</p>', webpage, 'error', default=None)
+ if error:
+ raise ExtractorError(error, expected=True)
+
+ video_src_params_raw = self._search_regex(r'<iframe[^>]+id="videa_player_iframe"[^>]+src="/player\?([^"]+)"', webpage, 'video_src_params')
+ video_src_params = compat_parse_qs(video_src_params_raw)
+ player_page = self._download_webpage("https://videa.hu/videojs_player?%s" % video_src_params_raw, video_id, fatal=True)
+ nonce = self._search_regex(r'_xt\s*=\s*"([^"]+)"', player_page, 'nonce')
+ random_seed = ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for _ in range(8))
+ static_secret = 'xHb0ZvME5q8CBcoQi6AngerDu3FGO9fkUlwPmLVY_RTzj2hJIS4NasXWKy1td7p'
+ l = nonce[:32]
+ s = nonce[32:]
+ result = ''
+ for i in range(0, 32):
+ result += s[i - (static_secret.index(l[i]) - 31)]
+
+ video_src_params['_s'] = random_seed
+ video_src_params['_t'] = result[:16]
+ encryption_key_stem = result[16:] + random_seed
+
+ [b64_info, handle] = self._download_webpage_handle(
+ 'http://videa.hu/videaplayer_get_xml.php', video_id,
+ query=video_src_params, fatal=True)
+
+ encrypted_info = compat_b64decode(b64_info)
+ key = encryption_key_stem + handle.info()['x-videa-xs']
+ info_str = self.rc4(encrypted_info, key).decode('utf8')
+ info = self._parse_xml(info_str, video_id)
+
+ video = xpath_element(info, './/video', 'video', fatal=True)
+ sources = xpath_element(info, './/video_sources', 'sources', fatal=True)
+ hash_values = xpath_element(info, './/hash_values', 'hash_values', fatal=True)
+
+ title = xpath_text(video, './title', fatal=True)
+
+ formats = []
+ for source in sources.findall('./video_source'):
+ source_url = source.text
+ if not source_url:
+ continue
+ source_url += '?md5=%s&expires=%s' % (hash_values.find('hash_value_%s' % source.get('name')).text, source.get('exp'))
+ f = parse_codecs(source.get('codecs'))
+ f.update({
+ 'url': source_url,
+ 'ext': mimetype2ext(source.get('mimetype')) or 'mp4',
+ 'format_id': source.get('name'),
+ 'width': int_or_none(source.get('width')),
+ 'height': int_or_none(source.get('height')),
+ })
+ formats.append(f)
+ self._sort_formats(formats)
+
+ thumbnail = xpath_text(video, './poster_src')
+ duration = int_or_none(xpath_text(video, './duration'))
+
+ age_limit = None
+ is_adult = xpath_text(video, './is_adult_content', default=None)
+ if is_adult:
+ age_limit = 18 if is_adult == '1' else 0
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
diff --git a/youtube_dlc/extractor/videodetective.py b/youtube_dlc/extractor/videodetective.py
new file mode 100644
index 000000000..fe70db713
--- /dev/null
+++ b/youtube_dlc/extractor/videodetective.py
@@ -0,0 +1,29 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from .internetvideoarchive import InternetVideoArchiveIE
+
+
+class VideoDetectiveIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?videodetective\.com/[^/]+/[^/]+/(?P<id>\d+)'
+
+ _TEST = {
+ 'url': 'http://www.videodetective.com/movies/kick-ass-2/194487',
+ 'info_dict': {
+ 'id': '194487',
+ 'ext': 'mp4',
+ 'title': 'Kick-Ass 2',
+ 'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ query = 'customerid=69249&publishedid=' + video_id
+ return self.url_result(
+ InternetVideoArchiveIE._build_json_url(query),
+ ie=InternetVideoArchiveIE.ie_key())
diff --git a/youtube_dl/extractor/videofyme.py b/youtube_dlc/extractor/videofyme.py
index cd3f50a63..cd3f50a63 100644
--- a/youtube_dl/extractor/videofyme.py
+++ b/youtube_dlc/extractor/videofyme.py
diff --git a/youtube_dl/extractor/videomore.py b/youtube_dlc/extractor/videomore.py
index e3eda3327..e3eda3327 100644
--- a/youtube_dl/extractor/videomore.py
+++ b/youtube_dlc/extractor/videomore.py
diff --git a/youtube_dl/extractor/videopress.py b/youtube_dlc/extractor/videopress.py
index e5f964d39..e5f964d39 100644
--- a/youtube_dl/extractor/videopress.py
+++ b/youtube_dlc/extractor/videopress.py
diff --git a/youtube_dl/extractor/vidio.py b/youtube_dlc/extractor/vidio.py
index b48baf00b..b48baf00b 100644
--- a/youtube_dl/extractor/vidio.py
+++ b/youtube_dlc/extractor/vidio.py
diff --git a/youtube_dl/extractor/vidlii.py b/youtube_dlc/extractor/vidlii.py
index f4774256b..f4774256b 100644
--- a/youtube_dl/extractor/vidlii.py
+++ b/youtube_dlc/extractor/vidlii.py
diff --git a/youtube_dl/extractor/vidme.py b/youtube_dlc/extractor/vidme.py
index 174e69cd6..174e69cd6 100644
--- a/youtube_dl/extractor/vidme.py
+++ b/youtube_dlc/extractor/vidme.py
diff --git a/youtube_dlc/extractor/vidzi.py b/youtube_dlc/extractor/vidzi.py
new file mode 100644
index 000000000..4e79a0b84
--- /dev/null
+++ b/youtube_dlc/extractor/vidzi.py
@@ -0,0 +1,68 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ decode_packed_codes,
+ js_to_json,
+ NO_DEFAULT,
+ PACKED_CODES_RE,
+)
+
+
+class VidziIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?vidzi\.(?:tv|cc|si|nu)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
+ _TESTS = [{
+ 'url': 'http://vidzi.tv/cghql9yq6emu.html',
+ 'md5': '4f16c71ca0c8c8635ab6932b5f3f1660',
+ 'info_dict': {
+ 'id': 'cghql9yq6emu',
+ 'ext': 'mp4',
+ 'title': 'youtube-dlc test video 1\\\\2\'3/4<5\\\\6ä7↭',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://vidzi.tv/embed-4z2yb0rzphe9-600x338.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://vidzi.cc/cghql9yq6emu.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vidzi.si/rph9gztxj1et.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://vidzi.nu/cghql9yq6emu.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://vidzi.tv/%s' % video_id, video_id)
+ title = self._html_search_regex(
+ r'(?s)<h2 class="video-title">(.*?)</h2>', webpage, 'title')
+
+ codes = [webpage]
+ codes.extend([
+ decode_packed_codes(mobj.group(0)).replace('\\\'', '\'')
+ for mobj in re.finditer(PACKED_CODES_RE, webpage)])
+ for num, code in enumerate(codes, 1):
+ jwplayer_data = self._parse_json(
+ self._search_regex(
+ r'setup\(([^)]+)\)', code, 'jwplayer data',
+ default=NO_DEFAULT if num == len(codes) else '{}'),
+ video_id, transform_source=lambda s: js_to_json(
+ re.sub(r'\s*\+\s*window\[.+?\]', '', s)))
+ if jwplayer_data:
+ break
+
+ info_dict = self._parse_jwplayer_data(jwplayer_data, video_id, require_title=False)
+ info_dict['title'] = title
+
+ return info_dict
diff --git a/youtube_dl/extractor/vier.py b/youtube_dlc/extractor/vier.py
index dbd5ba9ba..dbd5ba9ba 100644
--- a/youtube_dl/extractor/vier.py
+++ b/youtube_dlc/extractor/vier.py
diff --git a/youtube_dlc/extractor/viewlift.py b/youtube_dlc/extractor/viewlift.py
new file mode 100644
index 000000000..d6b92b1c8
--- /dev/null
+++ b/youtube_dlc/extractor/viewlift.py
@@ -0,0 +1,250 @@
+from __future__ import unicode_literals
+
+import json
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_age_limit,
+)
+
+
+class ViewLiftBaseIE(InfoExtractor):
+ _API_BASE = 'https://prod-api.viewlift.com/'
+ _DOMAINS_REGEX = r'(?:(?:main\.)?snagfilms|snagxtreme|funnyforfree|kiddovid|winnersview|(?:monumental|lax)sportsnetwork|vayafilm|failarmy|ftfnext|lnppass\.legapallacanestro|moviespree|app\.myoutdoortv|neoufitness|pflmma|theidentitytb)\.com|(?:hoichoi|app\.horseandcountry|kronon|marquee|supercrosslive)\.tv'
+ _SITE_MAP = {
+ 'ftfnext': 'lax',
+ 'funnyforfree': 'snagfilms',
+ 'hoichoi': 'hoichoitv',
+ 'kiddovid': 'snagfilms',
+ 'laxsportsnetwork': 'lax',
+ 'legapallacanestro': 'lnp',
+ 'marquee': 'marquee-tv',
+ 'monumentalsportsnetwork': 'monumental-network',
+ 'moviespree': 'bingeflix',
+ 'pflmma': 'pfl',
+ 'snagxtreme': 'snagfilms',
+ 'theidentitytb': 'tampabay',
+ 'vayafilm': 'snagfilms',
+ }
+ _TOKENS = {}
+
+ def _call_api(self, site, path, video_id, query):
+ token = self._TOKENS.get(site)
+ if not token:
+ token_query = {'site': site}
+ email, password = self._get_login_info(netrc_machine=site)
+ if email:
+ resp = self._download_json(
+ self._API_BASE + 'identity/signin', video_id,
+ 'Logging in', query=token_query, data=json.dumps({
+ 'email': email,
+ 'password': password,
+ }).encode())
+ else:
+ resp = self._download_json(
+ self._API_BASE + 'identity/anonymous-token', video_id,
+ 'Downloading authorization token', query=token_query)
+ self._TOKENS[site] = token = resp['authorizationToken']
+ return self._download_json(
+ self._API_BASE + path, video_id,
+ headers={'Authorization': token}, query=query)
+
+
+class ViewLiftEmbedIE(ViewLiftBaseIE):
+ IE_NAME = 'viewlift:embed'
+ _VALID_URL = r'https?://(?:(?:www|embed)\.)?(?P<domain>%s)/embed/player\?.*\bfilmId=(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' % ViewLiftBaseIE._DOMAINS_REGEX
+ _TESTS = [{
+ 'url': 'http://embed.snagfilms.com/embed/player?filmId=74849a00-85a9-11e1-9660-123139220831&w=500',
+ 'md5': '2924e9215c6eff7a55ed35b72276bd93',
+ 'info_dict': {
+ 'id': '74849a00-85a9-11e1-9660-123139220831',
+ 'ext': 'mp4',
+ 'title': '#whilewewatch',
+ 'description': 'md5:b542bef32a6f657dadd0df06e26fb0c8',
+ 'timestamp': 1334350096,
+ 'upload_date': '20120413',
+ }
+ }, {
+ # invalid labels, 360p is better that 480p
+ 'url': 'http://www.snagfilms.com/embed/player?filmId=17ca0950-a74a-11e0-a92a-0026bb61d036',
+ 'md5': '882fca19b9eb27ef865efeeaed376a48',
+ 'info_dict': {
+ 'id': '17ca0950-a74a-11e0-a92a-0026bb61d036',
+ 'ext': 'mp4',
+ 'title': 'Life in Limbo',
+ },
+ 'skip': 'The video does not exist',
+ }, {
+ 'url': 'http://www.snagfilms.com/embed/player?filmId=0000014c-de2f-d5d6-abcf-ffef58af0017',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_url(webpage):
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:embed\.)?(?:%s)/embed/player.+?)\1' % ViewLiftBaseIE._DOMAINS_REGEX,
+ webpage)
+ if mobj:
+ return mobj.group('url')
+
+ def _real_extract(self, url):
+ domain, film_id = re.match(self._VALID_URL, url).groups()
+ site = domain.split('.')[-2]
+ if site in self._SITE_MAP:
+ site = self._SITE_MAP[site]
+ try:
+ content_data = self._call_api(
+ site, 'entitlement/video/status', film_id, {
+ 'id': film_id
+ })['video']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+ error_message = self._parse_json(e.cause.read().decode(), film_id).get('errorMessage')
+ if error_message == 'User does not have a valid subscription or has not purchased this content.':
+ self.raise_login_required()
+ raise ExtractorError(error_message, expected=True)
+ raise
+ gist = content_data['gist']
+ title = gist['title']
+ video_assets = content_data['streamingInfo']['videoAssets']
+
+ formats = []
+ mpeg_video_assets = video_assets.get('mpeg') or []
+ for video_asset in mpeg_video_assets:
+ video_asset_url = video_asset.get('url')
+ if not video_asset:
+ continue
+ bitrate = int_or_none(video_asset.get('bitrate'))
+ height = int_or_none(self._search_regex(
+ r'^_?(\d+)[pP]$', video_asset.get('renditionValue'),
+ 'height', default=None))
+ formats.append({
+ 'url': video_asset_url,
+ 'format_id': 'http%s' % ('-%d' % bitrate if bitrate else ''),
+ 'tbr': bitrate,
+ 'height': height,
+ 'vcodec': video_asset.get('codec'),
+ })
+
+ hls_url = video_assets.get('hls')
+ if hls_url:
+ formats.extend(self._extract_m3u8_formats(
+ hls_url, film_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+ self._sort_formats(formats, ('height', 'tbr', 'format_id'))
+
+ info = {
+ 'id': film_id,
+ 'title': title,
+ 'description': gist.get('description'),
+ 'thumbnail': gist.get('videoImageUrl'),
+ 'duration': int_or_none(gist.get('runtime')),
+ 'age_limit': parse_age_limit(content_data.get('parentalRating')),
+ 'timestamp': int_or_none(gist.get('publishDate'), 1000),
+ 'formats': formats,
+ }
+ for k in ('categories', 'tags'):
+ info[k] = [v['title'] for v in content_data.get(k, []) if v.get('title')]
+ return info
+
+
+class ViewLiftIE(ViewLiftBaseIE):
+ IE_NAME = 'viewlift'
+ _VALID_URL = r'https?://(?:www\.)?(?P<domain>%s)(?P<path>(?:/(?:films/title|show|(?:news/)?videos?|watch))?/(?P<id>[^?#]+))' % ViewLiftBaseIE._DOMAINS_REGEX
+ _TESTS = [{
+ 'url': 'http://www.snagfilms.com/films/title/lost_for_life',
+ 'md5': '19844f897b35af219773fd63bdec2942',
+ 'info_dict': {
+ 'id': '0000014c-de2f-d5d6-abcf-ffef58af0017',
+ 'display_id': 'lost_for_life',
+ 'ext': 'mp4',
+ 'title': 'Lost for Life',
+ 'description': 'md5:ea10b5a50405ae1f7b5269a6ec594102',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 4489,
+ 'categories': 'mincount:3',
+ 'age_limit': 14,
+ 'upload_date': '20150421',
+ 'timestamp': 1429656820,
+ }
+ }, {
+ 'url': 'http://www.snagfilms.com/show/the_world_cut_project/india',
+ 'md5': 'e6292e5b837642bbda82d7f8bf3fbdfd',
+ 'info_dict': {
+ 'id': '00000145-d75c-d96e-a9c7-ff5c67b20000',
+ 'display_id': 'the_world_cut_project/india',
+ 'ext': 'mp4',
+ 'title': 'India',
+ 'description': 'md5:5c168c5a8f4719c146aad2e0dfac6f5f',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 979,
+ 'timestamp': 1399478279,
+ 'upload_date': '20140507',
+ }
+ }, {
+ 'url': 'http://main.snagfilms.com/augie_alone/s_2_ep_12_love',
+ 'info_dict': {
+ 'id': '00000148-7b53-de26-a9fb-fbf306f70020',
+ 'display_id': 'augie_alone/s_2_ep_12_love',
+ 'ext': 'mp4',
+ 'title': 'S. 2 Ep. 12 - Love',
+ 'description': 'Augie finds love.',
+ 'thumbnail': r're:^https?://.*\.jpg',
+ 'duration': 107,
+ 'upload_date': '20141012',
+ 'timestamp': 1413129540,
+ 'age_limit': 17,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://main.snagfilms.com/films/title/the_freebie',
+ 'only_matching': True,
+ }, {
+ # Film is not playable in your area.
+ 'url': 'http://www.snagfilms.com/films/title/inside_mecca',
+ 'only_matching': True,
+ }, {
+ # Film is not available.
+ 'url': 'http://www.snagfilms.com/show/augie_alone/flirting',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.winnersview.com/videos/the-good-son',
+ 'only_matching': True,
+ }, {
+ # Was once Kaltura embed
+ 'url': 'https://www.monumentalsportsnetwork.com/videos/john-carlson-postgame-2-25-15',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.marquee.tv/watch/sadlerswells-sacredmonsters',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if ViewLiftEmbedIE.suitable(url) else super(ViewLiftIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ domain, path, display_id = re.match(self._VALID_URL, url).groups()
+ site = domain.split('.')[-2]
+ if site in self._SITE_MAP:
+ site = self._SITE_MAP[site]
+ modules = self._call_api(
+ site, 'content/pages', display_id, {
+ 'includeContent': 'true',
+ 'moduleOffset': 1,
+ 'path': path,
+ 'site': site,
+ })['modules']
+ film_id = next(m['contentData'][0]['gist']['id'] for m in modules if m.get('moduleType') == 'VideoDetailModule')
+ return {
+ '_type': 'url_transparent',
+ 'url': 'http://%s/embed/player?filmId=%s' % (domain, film_id),
+ 'id': film_id,
+ 'display_id': display_id,
+ 'ie_key': 'ViewLiftEmbed',
+ }
diff --git a/youtube_dl/extractor/viidea.py b/youtube_dlc/extractor/viidea.py
index a0abbae60..a0abbae60 100644
--- a/youtube_dl/extractor/viidea.py
+++ b/youtube_dlc/extractor/viidea.py
diff --git a/youtube_dlc/extractor/viki.py b/youtube_dlc/extractor/viki.py
new file mode 100644
index 000000000..9e4171237
--- /dev/null
+++ b/youtube_dlc/extractor/viki.py
@@ -0,0 +1,384 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import hashlib
+import hmac
+import itertools
+import json
+import re
+import time
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ parse_age_limit,
+ parse_iso8601,
+ sanitized_Request,
+)
+
+
+class VikiBaseIE(InfoExtractor):
+ _VALID_URL_BASE = r'https?://(?:www\.)?viki\.(?:com|net|mx|jp|fr)/'
+ _API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com'
+ _API_URL_TEMPLATE = 'https://api.viki.io%s&sig=%s'
+
+ _APP = '100005a'
+ _APP_VERSION = '2.2.5.1428709186'
+ _APP_SECRET = 'MM_d*yP@`&1@]@!AVrXf_o-HVEnoTnm$O-ti4[G~$JDI/Dc-&piU&z&5.;:}95=Iad'
+
+ _GEO_BYPASS = False
+ _NETRC_MACHINE = 'viki'
+
+ _token = None
+
+ _ERRORS = {
+ 'geo': 'Sorry, this content is not available in your region.',
+ 'upcoming': 'Sorry, this content is not yet available.',
+ # 'paywall': 'paywall',
+ }
+
+ def _prepare_call(self, path, timestamp=None, post_data=None):
+ path += '?' if '?' not in path else '&'
+ if not timestamp:
+ timestamp = int(time.time())
+ query = self._API_QUERY_TEMPLATE % (path, self._APP, timestamp)
+ if self._token:
+ query += '&token=%s' % self._token
+ sig = hmac.new(
+ self._APP_SECRET.encode('ascii'),
+ query.encode('ascii'),
+ hashlib.sha1
+ ).hexdigest()
+ url = self._API_URL_TEMPLATE % (query, sig)
+ return sanitized_Request(
+ url, json.dumps(post_data).encode('utf-8')) if post_data else url
+
+ def _call_api(self, path, video_id, note, timestamp=None, post_data=None):
+ resp = self._download_json(
+ self._prepare_call(path, timestamp, post_data), video_id, note, headers={'x-viki-app-ver': '2.2.5.1428709186'}, expected_status=[200, 400, 404])
+
+ error = resp.get('error')
+ if error:
+ if error == 'invalid timestamp':
+ resp = self._download_json(
+ self._prepare_call(path, int(resp['current_timestamp']), post_data),
+ video_id, '%s (retry)' % note, headers={'x-viki-app-ver': '2.2.5.1428709186'}, expected_status=[200, 400, 404])
+ error = resp.get('error')
+ if error:
+ self._raise_error(resp['error'])
+
+ return resp
+
+ def _raise_error(self, error):
+ raise ExtractorError(
+ '%s returned error: %s' % (self.IE_NAME, error),
+ expected=True)
+
+ def _check_errors(self, data):
+ for reason, status in data.get('blocking', {}).items():
+ if status and reason in self._ERRORS:
+ message = self._ERRORS[reason]
+ if reason == 'geo':
+ self.raise_geo_restricted(msg=message)
+ raise ExtractorError('%s said: %s' % (
+ self.IE_NAME, message), expected=True)
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ login_form = {
+ 'login_id': username,
+ 'password': password,
+ }
+
+ login = self._call_api(
+ 'sessions.json', None,
+ 'Logging in', post_data=login_form)
+
+ self._token = login.get('token')
+ if not self._token:
+ self.report_warning('Unable to get session token, login has probably failed')
+
+ @staticmethod
+ def dict_selection(dict_obj, preferred_key, allow_fallback=True):
+ if preferred_key in dict_obj:
+ return dict_obj.get(preferred_key)
+
+ if not allow_fallback:
+ return
+
+ filtered_dict = list(filter(None, [dict_obj.get(k) for k in dict_obj.keys()]))
+ return filtered_dict[0] if filtered_dict else None
+
+
+class VikiIE(VikiBaseIE):
+ IE_NAME = 'viki'
+ _VALID_URL = r'%s(?:videos|player)/(?P<id>[0-9]+v)' % VikiBaseIE._VALID_URL_BASE
+ _TESTS = [{
+ 'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
+ 'info_dict': {
+ 'id': '1023585v',
+ 'ext': 'mp4',
+ 'title': 'Heirs Episode 14',
+ 'uploader': 'SBS',
+ 'description': 'md5:c4b17b9626dd4b143dcc4d855ba3474e',
+ 'upload_date': '20131121',
+ 'age_limit': 13,
+ },
+ 'skip': 'Blocked in the US',
+ }, {
+ # clip
+ 'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
+ 'md5': '86c0b5dbd4d83a6611a79987cc7a1989',
+ 'info_dict': {
+ 'id': '1067139v',
+ 'ext': 'mp4',
+ 'title': "'The Avengers: Age of Ultron' Press Conference",
+ 'description': 'md5:d70b2f9428f5488321bfe1db10d612ea',
+ 'duration': 352,
+ 'timestamp': 1430380829,
+ 'upload_date': '20150430',
+ 'uploader': 'Arirang TV',
+ 'like_count': int,
+ 'age_limit': 0,
+ }
+ }, {
+ 'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi',
+ 'info_dict': {
+ 'id': '1048879v',
+ 'ext': 'mp4',
+ 'title': 'Ankhon Dekhi',
+ 'duration': 6512,
+ 'timestamp': 1408532356,
+ 'upload_date': '20140820',
+ 'uploader': 'Spuul',
+ 'like_count': int,
+ 'age_limit': 13,
+ },
+ 'skip': 'Blocked in the US',
+ }, {
+ # episode
+ 'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
+ 'md5': '5fa476a902e902783ac7a4d615cdbc7a',
+ 'info_dict': {
+ 'id': '44699v',
+ 'ext': 'mp4',
+ 'title': 'Boys Over Flowers - Episode 1',
+ 'description': 'md5:b89cf50038b480b88b5b3c93589a9076',
+ 'duration': 4204,
+ 'timestamp': 1270496524,
+ 'upload_date': '20100405',
+ 'uploader': 'group8',
+ 'like_count': int,
+ 'age_limit': 13,
+ }
+ }, {
+ # youtube external
+ 'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
+ 'md5': '63f8600c1da6f01b7640eee7eca4f1da',
+ 'info_dict': {
+ 'id': '50562v',
+ 'ext': 'webm',
+ 'title': 'Poor Nastya [COMPLETE] - Episode 1',
+ 'description': '',
+ 'duration': 606,
+ 'timestamp': 1274949505,
+ 'upload_date': '20101213',
+ 'uploader': 'ad14065n',
+ 'uploader_id': 'ad14065n',
+ 'like_count': int,
+ 'age_limit': 13,
+ }
+ }, {
+ 'url': 'http://www.viki.com/player/44699v',
+ 'only_matching': True,
+ }, {
+ # non-English description
+ 'url': 'http://www.viki.com/videos/158036v-love-in-magic',
+ 'md5': '1713ae35df5a521b31f6dc40730e7c9c',
+ 'info_dict': {
+ 'id': '158036v',
+ 'ext': 'mp4',
+ 'uploader': 'I Planet Entertainment',
+ 'upload_date': '20111122',
+ 'timestamp': 1321985454,
+ 'description': 'md5:44b1e46619df3a072294645c770cef36',
+ 'title': 'Love In Magic',
+ 'age_limit': 13,
+ },
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ video = self._call_api(
+ 'videos/%s.json' % video_id, video_id, 'Downloading video JSON')
+
+ self._check_errors(video)
+
+ title = self.dict_selection(video.get('titles', {}), 'en', allow_fallback=False)
+ if not title:
+ title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id
+ container_titles = video.get('container', {}).get('titles', {})
+ container_title = self.dict_selection(container_titles, 'en')
+ title = '%s - %s' % (container_title, title)
+
+ description = self.dict_selection(video.get('descriptions', {}), 'en')
+
+ duration = int_or_none(video.get('duration'))
+ timestamp = parse_iso8601(video.get('created_at'))
+ uploader = video.get('author')
+ like_count = int_or_none(video.get('likes', {}).get('count'))
+ age_limit = parse_age_limit(video.get('rating'))
+
+ thumbnails = []
+ for thumbnail_id, thumbnail in video.get('images', {}).items():
+ thumbnails.append({
+ 'id': thumbnail_id,
+ 'url': thumbnail.get('url'),
+ })
+
+ subtitles = {}
+ for subtitle_lang, _ in video.get('subtitle_completions', {}).items():
+ subtitles[subtitle_lang] = [{
+ 'ext': subtitles_format,
+ 'url': self._prepare_call(
+ 'videos/%s/subtitles/%s.%s' % (video_id, subtitle_lang, subtitles_format)),
+ } for subtitles_format in ('srt', 'vtt')]
+
+ result = {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'uploader': uploader,
+ 'like_count': like_count,
+ 'age_limit': age_limit,
+ 'thumbnails': thumbnails,
+ 'subtitles': subtitles,
+ }
+
+ streams = self._call_api(
+ 'videos/%s/streams.json' % video_id, video_id,
+ 'Downloading video streams JSON')
+
+ if 'external' in streams:
+ result.update({
+ '_type': 'url_transparent',
+ 'url': streams['external']['url'],
+ })
+ return result
+
+ formats = []
+ for format_id, stream_dict in streams.items():
+ height = int_or_none(self._search_regex(
+ r'^(\d+)[pP]$', format_id, 'height', default=None))
+ for protocol, format_dict in stream_dict.items():
+ # rtmps URLs does not seem to work
+ if protocol == 'rtmps':
+ continue
+ format_url = format_dict['url']
+ if format_id == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(
+ format_url, video_id, 'mp4',
+ entry_protocol='m3u8_native',
+ m3u8_id='m3u8-%s' % protocol, fatal=False)
+ # Despite CODECS metadata in m3u8 all video-only formats
+ # are actually video+audio
+ for f in m3u8_formats:
+ if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
+ f['acodec'] = None
+ formats.extend(m3u8_formats)
+ elif format_url.startswith('rtmp'):
+ mobj = re.search(
+ r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
+ format_url)
+ if not mobj:
+ continue
+ formats.append({
+ 'format_id': 'rtmp-%s' % format_id,
+ 'ext': 'flv',
+ 'url': mobj.group('url'),
+ 'play_path': mobj.group('playpath'),
+ 'app': mobj.group('app'),
+ 'page_url': url,
+ })
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': '%s-%s' % (format_id, protocol),
+ 'height': height,
+ })
+ self._sort_formats(formats)
+
+ result['formats'] = formats
+ return result
+
+
+class VikiChannelIE(VikiBaseIE):
+ IE_NAME = 'viki:channel'
+ _VALID_URL = r'%s(?:tv|news|movies|artists)/(?P<id>[0-9]+c)' % VikiBaseIE._VALID_URL_BASE
+ _TESTS = [{
+ 'url': 'http://www.viki.com/tv/50c-boys-over-flowers',
+ 'info_dict': {
+ 'id': '50c',
+ 'title': 'Boys Over Flowers',
+ 'description': 'md5:ecd3cff47967fe193cff37c0bec52790',
+ },
+ 'playlist_mincount': 71,
+ }, {
+ 'url': 'http://www.viki.com/tv/1354c-poor-nastya-complete',
+ 'info_dict': {
+ 'id': '1354c',
+ 'title': 'Poor Nastya [COMPLETE]',
+ 'description': 'md5:05bf5471385aa8b21c18ad450e350525',
+ },
+ 'playlist_count': 127,
+ }, {
+ 'url': 'http://www.viki.com/news/24569c-showbiz-korea',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.viki.com/movies/22047c-pride-and-prejudice-2005',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.viki.com/artists/2141c-shinee',
+ 'only_matching': True,
+ }]
+
+ _PER_PAGE = 25
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+
+ channel = self._call_api(
+ 'containers/%s.json' % channel_id, channel_id,
+ 'Downloading channel JSON')
+
+ self._check_errors(channel)
+
+ title = self.dict_selection(channel['titles'], 'en')
+
+ description = self.dict_selection(channel['descriptions'], 'en')
+
+ entries = []
+ for video_type in ('episodes', 'clips', 'movies'):
+ for page_num in itertools.count(1):
+ page = self._call_api(
+ 'containers/%s/%s.json?per_page=%d&sort=number&direction=asc&with_paging=true&page=%d'
+ % (channel_id, video_type, self._PER_PAGE, page_num), channel_id,
+ 'Downloading %s JSON page #%d' % (video_type, page_num))
+ for video in page['response']:
+ video_id = video['id']
+ entries.append(self.url_result(
+ 'https://www.viki.com/videos/%s' % video_id, 'Viki'))
+ if not page['pagination']['next']:
+ break
+
+ return self.playlist_result(entries, channel_id, title, description)
diff --git a/youtube_dlc/extractor/vimeo.py b/youtube_dlc/extractor/vimeo.py
new file mode 100644
index 000000000..9839657ca
--- /dev/null
+++ b/youtube_dlc/extractor/vimeo.py
@@ -0,0 +1,1128 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import base64
+import functools
+import json
+import re
+import itertools
+
+from .common import InfoExtractor
+from ..compat import (
+ compat_kwargs,
+ compat_HTTPError,
+ compat_str,
+ compat_urlparse,
+)
+from ..utils import (
+ clean_html,
+ determine_ext,
+ dict_get,
+ ExtractorError,
+ js_to_json,
+ int_or_none,
+ merge_dicts,
+ OnDemandPagedList,
+ parse_filesize,
+ RegexNotFoundError,
+ sanitized_Request,
+ smuggle_url,
+ std_headers,
+ str_or_none,
+ try_get,
+ unified_timestamp,
+ unsmuggle_url,
+ urlencode_postdata,
+ urljoin,
+ unescapeHTML,
+)
+
+
+class VimeoBaseInfoExtractor(InfoExtractor):
+ _NETRC_MACHINE = 'vimeo'
+ _LOGIN_REQUIRED = False
+ _LOGIN_URL = 'https://vimeo.com/log_in'
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ if self._LOGIN_REQUIRED:
+ raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
+ return
+ webpage = self._download_webpage(
+ self._LOGIN_URL, None, 'Downloading login page')
+ token, vuid = self._extract_xsrft_and_vuid(webpage)
+ data = {
+ 'action': 'login',
+ 'email': username,
+ 'password': password,
+ 'service': 'vimeo',
+ 'token': token,
+ }
+ self._set_vimeo_cookie('vuid', vuid)
+ try:
+ self._download_webpage(
+ self._LOGIN_URL, None, 'Logging in',
+ data=urlencode_postdata(data), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ 'Referer': self._LOGIN_URL,
+ })
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 418:
+ raise ExtractorError(
+ 'Unable to log in: bad username or password',
+ expected=True)
+ raise ExtractorError('Unable to log in')
+
+ def _verify_video_password(self, url, video_id, webpage):
+ password = self._downloader.params.get('videopassword')
+ if password is None:
+ raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
+ token, vuid = self._extract_xsrft_and_vuid(webpage)
+ data = urlencode_postdata({
+ 'password': password,
+ 'token': token,
+ })
+ if url.startswith('http://'):
+ # vimeo only supports https now, but the user can give an http url
+ url = url.replace('http://', 'https://')
+ password_request = sanitized_Request(url + '/password', data)
+ password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
+ password_request.add_header('Referer', url)
+ self._set_vimeo_cookie('vuid', vuid)
+ return self._download_webpage(
+ password_request, video_id,
+ 'Verifying the password', 'Wrong password')
+
+ def _extract_xsrft_and_vuid(self, webpage):
+ xsrft = self._search_regex(
+ r'(?:(?P<q1>["\'])xsrft(?P=q1)\s*:|xsrft\s*[=:])\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
+ webpage, 'login token', group='xsrft')
+ vuid = self._search_regex(
+ r'["\']vuid["\']\s*:\s*(["\'])(?P<vuid>.+?)\1',
+ webpage, 'vuid', group='vuid')
+ return xsrft, vuid
+
+ def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs):
+ vimeo_config = self._search_regex(
+ r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));',
+ webpage, 'vimeo config', *args, **compat_kwargs(kwargs))
+ if vimeo_config:
+ return self._parse_json(vimeo_config, video_id)
+
+ def _set_vimeo_cookie(self, name, value):
+ self._set_cookie('vimeo.com', name, value)
+
+ def _vimeo_sort_formats(self, formats):
+ # Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
+ # at the same time without actual units specified. This lead to wrong sorting.
+ self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'tbr', 'format_id'))
+
+ def _parse_config(self, config, video_id):
+ video_data = config['video']
+ video_title = video_data['title']
+ live_event = video_data.get('live_event') or {}
+ is_live = live_event.get('status') == 'started'
+
+ formats = []
+ config_files = video_data.get('files') or config['request'].get('files', {})
+ for f in config_files.get('progressive', []):
+ video_url = f.get('url')
+ if not video_url:
+ continue
+ formats.append({
+ 'url': video_url,
+ 'format_id': 'http-%s' % f.get('quality'),
+ 'width': int_or_none(f.get('width')),
+ 'height': int_or_none(f.get('height')),
+ 'fps': int_or_none(f.get('fps')),
+ 'tbr': int_or_none(f.get('bitrate')),
+ })
+
+ # TODO: fix handling of 308 status code returned for live archive manifest requests
+ sep_pattern = r'/sep/video/'
+ for files_type in ('hls', 'dash'):
+ for cdn_name, cdn_data in config_files.get(files_type, {}).get('cdns', {}).items():
+ manifest_url = cdn_data.get('url')
+ if not manifest_url:
+ continue
+ format_id = '%s-%s' % (files_type, cdn_name)
+ sep_manifest_urls = []
+ if re.search(sep_pattern, manifest_url):
+ for suffix, repl in (('', 'video'), ('_sep', 'sep/video')):
+ sep_manifest_urls.append((format_id + suffix, re.sub(
+ sep_pattern, '/%s/' % repl, manifest_url)))
+ else:
+ sep_manifest_urls = [(format_id, manifest_url)]
+ for f_id, m_url in sep_manifest_urls:
+ if files_type == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ m_url, video_id, 'mp4',
+ 'm3u8' if is_live else 'm3u8_native', m3u8_id=f_id,
+ note='Downloading %s m3u8 information' % cdn_name,
+ fatal=False))
+ elif files_type == 'dash':
+ if 'json=1' in m_url:
+ real_m_url = (self._download_json(m_url, video_id, fatal=False) or {}).get('url')
+ if real_m_url:
+ m_url = real_m_url
+ mpd_formats = self._extract_mpd_formats(
+ m_url.replace('/master.json', '/master.mpd'), video_id, f_id,
+ 'Downloading %s MPD information' % cdn_name,
+ fatal=False)
+ formats.extend(mpd_formats)
+
+ live_archive = live_event.get('archive') or {}
+ live_archive_source_url = live_archive.get('source_url')
+ if live_archive_source_url and live_archive.get('status') == 'done':
+ formats.append({
+ 'format_id': 'live-archive-source',
+ 'url': live_archive_source_url,
+ 'preference': 1,
+ })
+
+ for f in formats:
+ if f.get('vcodec') == 'none':
+ f['preference'] = -50
+ elif f.get('acodec') == 'none':
+ f['preference'] = -40
+
+ subtitles = {}
+ text_tracks = config['request'].get('text_tracks')
+ if text_tracks:
+ for tt in text_tracks:
+ subtitles[tt['lang']] = [{
+ 'ext': 'vtt',
+ 'url': urljoin('https://vimeo.com', tt['url']),
+ }]
+
+ thumbnails = []
+ if not is_live:
+ for key, thumb in video_data.get('thumbs', {}).items():
+ thumbnails.append({
+ 'id': key,
+ 'width': int_or_none(key),
+ 'url': thumb,
+ })
+ thumbnail = video_data.get('thumbnail')
+ if thumbnail:
+ thumbnails.append({
+ 'url': thumbnail,
+ })
+
+ owner = video_data.get('owner') or {}
+ video_uploader_url = owner.get('url')
+
+ return {
+ 'id': str_or_none(video_data.get('id')) or video_id,
+ 'title': self._live_title(video_title) if is_live else video_title,
+ 'uploader': owner.get('name'),
+ 'uploader_id': video_uploader_url.split('/')[-1] if video_uploader_url else None,
+ 'uploader_url': video_uploader_url,
+ 'thumbnails': thumbnails,
+ 'duration': int_or_none(video_data.get('duration')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ 'is_live': is_live,
+ }
+
+ def _extract_original_format(self, url, video_id):
+ download_data = self._download_json(
+ url, video_id, fatal=False,
+ query={'action': 'load_download_config'},
+ headers={'X-Requested-With': 'XMLHttpRequest'})
+ if download_data:
+ source_file = download_data.get('source_file')
+ if isinstance(source_file, dict):
+ download_url = source_file.get('download_url')
+ if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
+ source_name = source_file.get('public_name', 'Original')
+ if self._is_valid_url(download_url, video_id, '%s video' % source_name):
+ ext = (try_get(
+ source_file, lambda x: x['extension'],
+ compat_str) or determine_ext(
+ download_url, None) or 'mp4').lower()
+ return {
+ 'url': download_url,
+ 'ext': ext,
+ 'width': int_or_none(source_file.get('width')),
+ 'height': int_or_none(source_file.get('height')),
+ 'filesize': parse_filesize(source_file.get('size')),
+ 'format_id': source_name,
+ 'preference': 1,
+ }
+
+
+class VimeoIE(VimeoBaseInfoExtractor):
+ """Information extractor for vimeo.com."""
+
+ # _VALID_URL matches Vimeo URLs
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:
+ www|
+ player
+ )
+ \.
+ )?
+ vimeo(?:pro)?\.com/
+ (?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/)
+ (?:.*?/)?
+ (?:
+ (?:
+ play_redirect_hls|
+ moogaloop\.swf)\?clip_id=
+ )?
+ (?:videos?/)?
+ (?P<id>[0-9]+)
+ (?:/[\da-f]+)?
+ /?(?:[?&].*)?(?:[#].*)?$
+ '''
+ IE_NAME = 'vimeo'
+ _TESTS = [
+ {
+ 'url': 'http://vimeo.com/56015672#at=0',
+ 'md5': '8879b6cc097e987f02484baf890129e5',
+ 'info_dict': {
+ 'id': '56015672',
+ 'ext': 'mp4',
+ 'title': "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
+ 'description': 'md5:2d3305bad981a06ff79f027f19865021',
+ 'timestamp': 1355990239,
+ 'upload_date': '20121220',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user7108434',
+ 'uploader_id': 'user7108434',
+ 'uploader': 'Filippo Valsorda',
+ 'duration': 10,
+ 'license': 'by-sa',
+ },
+ 'params': {
+ 'format': 'best[protocol=https]',
+ },
+ },
+ {
+ 'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876',
+ 'md5': '3b5ca6aa22b60dfeeadf50b72e44ed82',
+ 'note': 'Vimeo Pro video (#1197)',
+ 'info_dict': {
+ 'id': '68093876',
+ 'ext': 'mp4',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/openstreetmapus',
+ 'uploader_id': 'openstreetmapus',
+ 'uploader': 'OpenStreetMap US',
+ 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
+ 'description': 'md5:2c362968038d4499f4d79f88458590c1',
+ 'duration': 1595,
+ 'upload_date': '20130610',
+ 'timestamp': 1370893156,
+ },
+ 'params': {
+ 'format': 'best[protocol=https]',
+ },
+ },
+ {
+ 'url': 'http://player.vimeo.com/video/54469442',
+ 'md5': '619b811a4417aa4abe78dc653becf511',
+ 'note': 'Videos that embed the url in the player page',
+ 'info_dict': {
+ 'id': '54469442',
+ 'ext': 'mp4',
+ 'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software 2012',
+ 'uploader': 'The BLN & Business of Software',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/theblnbusinessofsoftware',
+ 'uploader_id': 'theblnbusinessofsoftware',
+ 'duration': 3610,
+ 'description': None,
+ },
+ 'params': {
+ 'format': 'best[protocol=https]',
+ },
+ 'expected_warnings': ['Unable to download JSON metadata'],
+ },
+ {
+ 'url': 'http://vimeo.com/68375962',
+ 'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7',
+ 'note': 'Video protected with password',
+ 'info_dict': {
+ 'id': '68375962',
+ 'ext': 'mp4',
+ 'title': 'youtube-dl password protected test video',
+ 'timestamp': 1371200155,
+ 'upload_date': '20130614',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
+ 'uploader_id': 'user18948128',
+ 'uploader': 'Jaime Marquínez Ferrándiz',
+ 'duration': 10,
+ 'description': 'md5:dca3ea23adb29ee387127bc4ddfce63f',
+ },
+ 'params': {
+ 'format': 'best[protocol=https]',
+ 'videopassword': 'youtube-dl',
+ },
+ },
+ {
+ 'url': 'http://vimeo.com/channels/keypeele/75629013',
+ 'md5': '2f86a05afe9d7abc0b9126d229bbe15d',
+ 'info_dict': {
+ 'id': '75629013',
+ 'ext': 'mp4',
+ 'title': 'Key & Peele: Terrorist Interrogation',
+ 'description': 'md5:8678b246399b070816b12313e8b4eb5c',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/atencio',
+ 'uploader_id': 'atencio',
+ 'uploader': 'Peter Atencio',
+ 'channel_id': 'keypeele',
+ 'channel_url': r're:https?://(?:www\.)?vimeo\.com/channels/keypeele',
+ 'timestamp': 1380339469,
+ 'upload_date': '20130928',
+ 'duration': 187,
+ },
+ 'expected_warnings': ['Unable to download JSON metadata'],
+ },
+ {
+ 'url': 'http://vimeo.com/76979871',
+ 'note': 'Video with subtitles',
+ 'info_dict': {
+ 'id': '76979871',
+ 'ext': 'mp4',
+ 'title': 'The New Vimeo Player (You Know, For Videos)',
+ 'description': 'md5:2ec900bf97c3f389378a96aee11260ea',
+ 'timestamp': 1381846109,
+ 'upload_date': '20131015',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/staff',
+ 'uploader_id': 'staff',
+ 'uploader': 'Vimeo Staff',
+ 'duration': 62,
+ }
+ },
+ {
+ # from https://www.ouya.tv/game/Pier-Solar-and-the-Great-Architects/
+ 'url': 'https://player.vimeo.com/video/98044508',
+ 'note': 'The js code contains assignments to the same variable as the config',
+ 'info_dict': {
+ 'id': '98044508',
+ 'ext': 'mp4',
+ 'title': 'Pier Solar OUYA Official Trailer',
+ 'uploader': 'Tulio Gonçalves',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user28849593',
+ 'uploader_id': 'user28849593',
+ },
+ },
+ {
+ # contains original format
+ 'url': 'https://vimeo.com/33951933',
+ 'md5': '53c688fa95a55bf4b7293d37a89c5c53',
+ 'info_dict': {
+ 'id': '33951933',
+ 'ext': 'mp4',
+ 'title': 'FOX CLASSICS - Forever Classic ID - A Full Minute',
+ 'uploader': 'The DMCI',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/dmci',
+ 'uploader_id': 'dmci',
+ 'timestamp': 1324343742,
+ 'upload_date': '20111220',
+ 'description': 'md5:ae23671e82d05415868f7ad1aec21147',
+ },
+ },
+ {
+ # only available via https://vimeo.com/channels/tributes/6213729 and
+ # not via https://vimeo.com/6213729
+ 'url': 'https://vimeo.com/channels/tributes/6213729',
+ 'info_dict': {
+ 'id': '6213729',
+ 'ext': 'mp4',
+ 'title': 'Vimeo Tribute: The Shining',
+ 'uploader': 'Casey Donahue',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/caseydonahue',
+ 'uploader_id': 'caseydonahue',
+ 'channel_url': r're:https?://(?:www\.)?vimeo\.com/channels/tributes',
+ 'channel_id': 'tributes',
+ 'timestamp': 1250886430,
+ 'upload_date': '20090821',
+ 'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download JSON metadata'],
+ },
+ {
+ # redirects to ondemand extractor and should be passed through it
+ # for successful extraction
+ 'url': 'https://vimeo.com/73445910',
+ 'info_dict': {
+ 'id': '73445910',
+ 'ext': 'mp4',
+ 'title': 'The Reluctant Revolutionary',
+ 'uploader': '10Ft Films',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/tenfootfilms',
+ 'uploader_id': 'tenfootfilms',
+ 'description': 'md5:0fa704e05b04f91f40b7f3ca2e801384',
+ 'upload_date': '20130830',
+ 'timestamp': 1377853339,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download JSON metadata'],
+ },
+ {
+ 'url': 'http://player.vimeo.com/video/68375962',
+ 'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7',
+ 'info_dict': {
+ 'id': '68375962',
+ 'ext': 'mp4',
+ 'title': 'youtube-dl password protected test video',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
+ 'uploader_id': 'user18948128',
+ 'uploader': 'Jaime Marquínez Ferrándiz',
+ 'duration': 10,
+ },
+ 'params': {
+ 'format': 'best[protocol=https]',
+ 'videopassword': 'youtube-dl',
+ },
+ },
+ {
+ 'url': 'http://vimeo.com/moogaloop.swf?clip_id=2539741',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://vimeo.com/109815029',
+ 'note': 'Video not completely processed, "failed" seed status',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://vimeo.com/groups/travelhd/videos/22439234',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://vimeo.com/album/2632481/video/79010983',
+ 'only_matching': True,
+ },
+ {
+ # source file returns 403: Forbidden
+ 'url': 'https://vimeo.com/7809605',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://vimeo.com/160743502/abd0e13fb4',
+ 'only_matching': True,
+ }
+ # https://gettingthingsdone.com/workflowmap/
+ # vimeo embed with check-password page protected by Referer header
+ ]
+
+ @staticmethod
+ def _smuggle_referrer(url, referrer_url):
+ return smuggle_url(url, {'http_headers': {'Referer': referrer_url}})
+
+ @staticmethod
+ def _extract_urls(url, webpage):
+ urls = []
+ # Look for embedded (iframe) Vimeo player
+ for mobj in re.finditer(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/\d+.*?)\1',
+ webpage):
+ urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url))
+ PLAIN_EMBED_RE = (
+ # Look for embedded (swf embed) Vimeo player
+ r'<embed[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)\1',
+ # Look more for non-standard embedded Vimeo player
+ r'<video[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vimeo\.com/[0-9]+)\1',
+ )
+ for embed_re in PLAIN_EMBED_RE:
+ for mobj in re.finditer(embed_re, webpage):
+ urls.append(mobj.group('url'))
+ return urls
+
+ @staticmethod
+ def _extract_url(url, webpage):
+ urls = VimeoIE._extract_urls(url, webpage)
+ return urls[0] if urls else None
+
+ def _verify_player_video_password(self, url, video_id, headers):
+ password = self._downloader.params.get('videopassword')
+ if password is None:
+ raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
+ data = urlencode_postdata({
+ 'password': base64.b64encode(password.encode()),
+ })
+ headers = merge_dicts(headers, {
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ })
+ checked = self._download_json(
+ url + '/check-password', video_id,
+ 'Verifying the password', data=data, headers=headers)
+ if checked is False:
+ raise ExtractorError('Wrong video password', expected=True)
+ return checked
+
+ def _real_initialize(self):
+ self._login()
+
+ def _real_extract(self, url):
+ url, data = unsmuggle_url(url, {})
+ headers = std_headers.copy()
+ if 'http_headers' in data:
+ headers.update(data['http_headers'])
+ if 'Referer' not in headers:
+ headers['Referer'] = url
+
+ channel_id = self._search_regex(
+ r'vimeo\.com/channels/([^/]+)', url, 'channel id', default=None)
+
+ # Extract ID from URL
+ video_id = self._match_id(url)
+ orig_url = url
+ is_pro = 'vimeopro.com/' in url
+ is_player = '://player.vimeo.com/video/' in url
+ if is_pro:
+ # some videos require portfolio_id to be present in player url
+ # https://github.com/ytdl-org/youtube-dl/issues/20070
+ url = self._extract_url(url, self._download_webpage(url, video_id))
+ if not url:
+ url = 'https://vimeo.com/' + video_id
+ elif is_player:
+ url = 'https://player.vimeo.com/video/' + video_id
+ elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')):
+ url = 'https://vimeo.com/' + video_id
+
+ try:
+ # Retrieve video webpage to extract further information
+ webpage, urlh = self._download_webpage_handle(
+ url, video_id, headers=headers)
+ redirect_url = urlh.geturl()
+ except ExtractorError as ee:
+ if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
+ errmsg = ee.cause.read()
+ if b'Because of its privacy settings, this video cannot be played here' in errmsg:
+ raise ExtractorError(
+ 'Cannot download embed-only video without embedding '
+ 'URL. Please call youtube-dlc with the URL of the page '
+ 'that embeds this video.',
+ expected=True)
+ raise
+
+ # Now we begin extracting as much information as we can from what we
+ # retrieved. First we extract the information common to all extractors,
+ # and latter we extract those that are Vimeo specific.
+ self.report_extraction(video_id)
+
+ vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None)
+ if vimeo_config:
+ seed_status = vimeo_config.get('seed_status', {})
+ if seed_status.get('state') == 'failed':
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, seed_status['title']),
+ expected=True)
+
+ cc_license = None
+ timestamp = None
+ video_description = None
+
+ # Extract the config JSON
+ try:
+ try:
+ config_url = self._html_search_regex(
+ r' data-config-url="(.+?)"', webpage,
+ 'config URL', default=None)
+ if not config_url:
+ # Sometimes new react-based page is served instead of old one that require
+ # different config URL extraction approach (see
+ # https://github.com/ytdl-org/youtube-dl/pull/7209)
+ page_config = self._parse_json(self._search_regex(
+ r'vimeo\.(?:clip|vod_title)_page_config\s*=\s*({.+?});',
+ webpage, 'page config'), video_id)
+ config_url = page_config['player']['config_url']
+ cc_license = page_config.get('cc_license')
+ timestamp = try_get(
+ page_config, lambda x: x['clip']['uploaded_on'],
+ compat_str)
+ video_description = clean_html(dict_get(
+ page_config, ('description', 'description_html_escaped')))
+ config = self._download_json(config_url, video_id)
+ except RegexNotFoundError:
+ # For pro videos or player.vimeo.com urls
+ # We try to find out to which variable is assigned the config dic
+ m_variable_name = re.search(r'(\w)\.video\.id', webpage)
+ if m_variable_name is not None:
+ config_re = [r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))]
+ else:
+ config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
+ config_re.append(r'\bvar\s+r\s*=\s*({.+?})\s*;')
+ config_re.append(r'\bconfig\s*=\s*({.+?})\s*;')
+ config = self._search_regex(config_re, webpage, 'info section',
+ flags=re.DOTALL)
+ config = json.loads(config)
+ except Exception as e:
+ if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
+ raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option')
+
+ if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None:
+ if '_video_password_verified' in data:
+ raise ExtractorError('video password verification failed!')
+ self._verify_video_password(redirect_url, video_id, webpage)
+ return self._real_extract(
+ smuggle_url(redirect_url, {'_video_password_verified': 'verified'}))
+ else:
+ raise ExtractorError('Unable to extract info section',
+ cause=e)
+ else:
+ if config.get('view') == 4:
+ config = self._verify_player_video_password(redirect_url, video_id, headers)
+
+ vod = config.get('video', {}).get('vod', {})
+
+ def is_rented():
+ if '>You rented this title.<' in webpage:
+ return True
+ if config.get('user', {}).get('purchased'):
+ return True
+ for purchase_option in vod.get('purchase_options', []):
+ if purchase_option.get('purchased'):
+ return True
+ label = purchase_option.get('label_string')
+ if label and (label.startswith('You rented this') or label.endswith(' remaining')):
+ return True
+ return False
+
+ if is_rented() and vod.get('is_trailer'):
+ feature_id = vod.get('feature_id')
+ if feature_id and not data.get('force_feature_id', False):
+ return self.url_result(smuggle_url(
+ 'https://player.vimeo.com/player/%s' % feature_id,
+ {'force_feature_id': True}), 'Vimeo')
+
+ # Extract video description
+ if not video_description:
+ video_description = self._html_search_regex(
+ r'(?s)<div\s+class="[^"]*description[^"]*"[^>]*>(.*?)</div>',
+ webpage, 'description', default=None)
+ if not video_description:
+ video_description = self._html_search_meta(
+ 'description', webpage, default=None)
+ if not video_description and is_pro:
+ orig_webpage = self._download_webpage(
+ orig_url, video_id,
+ note='Downloading webpage for description',
+ fatal=False)
+ if orig_webpage:
+ video_description = self._html_search_meta(
+ 'description', orig_webpage, default=None)
+ if not video_description and not is_player:
+ self._downloader.report_warning('Cannot find video description')
+
+ # Extract upload date
+ if not timestamp:
+ timestamp = self._search_regex(
+ r'<time[^>]+datetime="([^"]+)"', webpage,
+ 'timestamp', default=None)
+
+ try:
+ view_count = int(self._search_regex(r'UserPlays:(\d+)', webpage, 'view count'))
+ like_count = int(self._search_regex(r'UserLikes:(\d+)', webpage, 'like count'))
+ comment_count = int(self._search_regex(r'UserComments:(\d+)', webpage, 'comment count'))
+ except RegexNotFoundError:
+ # This info is only available in vimeo.com/{id} urls
+ view_count = None
+ like_count = None
+ comment_count = None
+
+ formats = []
+
+ source_format = self._extract_original_format(
+ 'https://vimeo.com/' + video_id, video_id)
+ if source_format:
+ formats.append(source_format)
+
+ info_dict_config = self._parse_config(config, video_id)
+ formats.extend(info_dict_config['formats'])
+ self._vimeo_sort_formats(formats)
+
+ json_ld = self._search_json_ld(webpage, video_id, default={})
+
+ if not cc_license:
+ cc_license = self._search_regex(
+ r'<link[^>]+rel=["\']license["\'][^>]+href=(["\'])(?P<license>(?:(?!\1).)+)\1',
+ webpage, 'license', default=None, group='license')
+
+ channel_url = 'https://vimeo.com/channels/%s' % channel_id if channel_id else None
+
+ info_dict = {
+ 'formats': formats,
+ 'timestamp': unified_timestamp(timestamp),
+ 'description': video_description,
+ 'webpage_url': url,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'comment_count': comment_count,
+ 'license': cc_license,
+ 'channel_id': channel_id,
+ 'channel_url': channel_url,
+ }
+
+ info_dict = merge_dicts(info_dict, info_dict_config, json_ld)
+
+ return info_dict
+
+
+class VimeoOndemandIE(VimeoIE):
+ IE_NAME = 'vimeo:ondemand'
+ _VALID_URL = r'https?://(?:www\.)?vimeo\.com/ondemand/([^/]+/)?(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ # ondemand video not available via https://vimeo.com/id
+ 'url': 'https://vimeo.com/ondemand/20704',
+ 'md5': 'c424deda8c7f73c1dfb3edd7630e2f35',
+ 'info_dict': {
+ 'id': '105442900',
+ 'ext': 'mp4',
+ 'title': 'המעבדה - במאי יותם פלדמן',
+ 'uploader': 'גם סרטים',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/gumfilms',
+ 'uploader_id': 'gumfilms',
+ 'description': 'md5:4c027c965e439de4baab621e48b60791',
+ 'upload_date': '20140906',
+ 'timestamp': 1410032453,
+ },
+ 'params': {
+ 'format': 'best[protocol=https]',
+ },
+ 'expected_warnings': ['Unable to download JSON metadata'],
+ }, {
+ # requires Referer to be passed along with og:video:url
+ 'url': 'https://vimeo.com/ondemand/36938/126682985',
+ 'info_dict': {
+ 'id': '126584684',
+ 'ext': 'mp4',
+ 'title': 'Rävlock, rätt läte på rätt plats',
+ 'uploader': 'Lindroth & Norin',
+ 'uploader_url': r're:https?://(?:www\.)?vimeo\.com/lindrothnorin',
+ 'uploader_id': 'lindrothnorin',
+ 'description': 'md5:c3c46a90529612c8279fb6af803fc0df',
+ 'upload_date': '20150502',
+ 'timestamp': 1430586422,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': ['Unable to download JSON metadata'],
+ }, {
+ 'url': 'https://vimeo.com/ondemand/nazmaalik',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vimeo.com/ondemand/141692381',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://vimeo.com/ondemand/thelastcolony/150274832',
+ 'only_matching': True,
+ }]
+
+
+class VimeoChannelIE(VimeoBaseInfoExtractor):
+ IE_NAME = 'vimeo:channel'
+ _VALID_URL = r'https://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])'
+ _MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
+ _TITLE = None
+ _TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
+ _TESTS = [{
+ 'url': 'https://vimeo.com/channels/tributes',
+ 'info_dict': {
+ 'id': 'tributes',
+ 'title': 'Vimeo Tributes',
+ },
+ 'playlist_mincount': 25,
+ }]
+ _BASE_URL_TEMPL = 'https://vimeo.com/channels/%s'
+
+ def _page_url(self, base_url, pagenum):
+ return '%s/videos/page:%d/' % (base_url, pagenum)
+
+ def _extract_list_title(self, webpage):
+ return self._TITLE or self._html_search_regex(
+ self._TITLE_RE, webpage, 'list title', fatal=False)
+
+ def _title_and_entries(self, list_id, base_url):
+ for pagenum in itertools.count(1):
+ page_url = self._page_url(base_url, pagenum)
+ webpage = self._download_webpage(
+ page_url, list_id,
+ 'Downloading page %s' % pagenum)
+
+ if pagenum == 1:
+ yield self._extract_list_title(webpage)
+
+ # Try extracting href first since not all videos are available via
+ # short https://vimeo.com/id URL (e.g. https://vimeo.com/channels/tributes/6213729)
+ clips = re.findall(
+ r'id="clip_(\d+)"[^>]*>\s*<a[^>]+href="(/(?:[^/]+/)*\1)(?:[^>]+\btitle="([^"]+)")?', webpage)
+ if clips:
+ for video_id, video_url, video_title in clips:
+ yield self.url_result(
+ compat_urlparse.urljoin(base_url, video_url),
+ VimeoIE.ie_key(), video_id=video_id, video_title=video_title)
+ # More relaxed fallback
+ else:
+ for video_id in re.findall(r'id=["\']clip_(\d+)', webpage):
+ yield self.url_result(
+ 'https://vimeo.com/%s' % video_id,
+ VimeoIE.ie_key(), video_id=video_id)
+
+ if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
+ break
+
+ def _extract_videos(self, list_id, base_url):
+ title_and_entries = self._title_and_entries(list_id, base_url)
+ list_title = next(title_and_entries)
+ return self.playlist_result(title_and_entries, list_id, list_title)
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+ return self._extract_videos(channel_id, self._BASE_URL_TEMPL % channel_id)
+
+
+class VimeoUserIE(VimeoChannelIE):
+ IE_NAME = 'vimeo:user'
+ _VALID_URL = r'https://vimeo\.com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P<id>[^/]+)(?:/videos|[#?]|$)'
+ _TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
+ _TESTS = [{
+ 'url': 'https://vimeo.com/nkistudio/videos',
+ 'info_dict': {
+ 'title': 'Nki',
+ 'id': 'nkistudio',
+ },
+ 'playlist_mincount': 66,
+ }]
+ _BASE_URL_TEMPL = 'https://vimeo.com/%s'
+
+
+class VimeoAlbumIE(VimeoBaseInfoExtractor):
+ IE_NAME = 'vimeo:album'
+ _VALID_URL = r'https://vimeo\.com/(?:album|showcase)/(?P<id>\d+)(?:$|[?#]|/(?!video))'
+ _TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
+ _TESTS = [{
+ 'url': 'https://vimeo.com/album/2632481',
+ 'info_dict': {
+ 'id': '2632481',
+ 'title': 'Staff Favorites: November 2013',
+ },
+ 'playlist_mincount': 13,
+ }, {
+ 'note': 'Password-protected album',
+ 'url': 'https://vimeo.com/album/3253534',
+ 'info_dict': {
+ 'title': 'test',
+ 'id': '3253534',
+ },
+ 'playlist_count': 1,
+ 'params': {
+ 'videopassword': 'youtube-dl',
+ }
+ }]
+ _PAGE_SIZE = 100
+
+ def _fetch_page(self, album_id, authorizaion, hashed_pass, page):
+ api_page = page + 1
+ query = {
+ 'fields': 'link,uri',
+ 'page': api_page,
+ 'per_page': self._PAGE_SIZE,
+ }
+ if hashed_pass:
+ query['_hashed_pass'] = hashed_pass
+ videos = self._download_json(
+ 'https://api.vimeo.com/albums/%s/videos' % album_id,
+ album_id, 'Downloading page %d' % api_page, query=query, headers={
+ 'Authorization': 'jwt ' + authorizaion,
+ })['data']
+ for video in videos:
+ link = video.get('link')
+ if not link:
+ continue
+ uri = video.get('uri')
+ video_id = self._search_regex(r'/videos/(\d+)', uri, 'video_id', default=None) if uri else None
+ yield self.url_result(link, VimeoIE.ie_key(), video_id)
+
+ def _real_extract(self, url):
+ album_id = self._match_id(url)
+ webpage = self._download_webpage(url, album_id)
+ viewer = self._parse_json(self._search_regex(
+ r'bootstrap_data\s*=\s*({.+?})</script>',
+ webpage, 'bootstrap data'), album_id)['viewer']
+ jwt = viewer['jwt']
+ album = self._download_json(
+ 'https://api.vimeo.com/albums/' + album_id,
+ album_id, headers={'Authorization': 'jwt ' + jwt},
+ query={'fields': 'description,name,privacy'})
+ hashed_pass = None
+ if try_get(album, lambda x: x['privacy']['view']) == 'password':
+ password = self._downloader.params.get('videopassword')
+ if not password:
+ raise ExtractorError(
+ 'This album is protected by a password, use the --video-password option',
+ expected=True)
+ self._set_vimeo_cookie('vuid', viewer['vuid'])
+ try:
+ hashed_pass = self._download_json(
+ 'https://vimeo.com/showcase/%s/auth' % album_id,
+ album_id, 'Verifying the password', data=urlencode_postdata({
+ 'password': password,
+ 'token': viewer['xsrft'],
+ }), headers={
+ 'X-Requested-With': 'XMLHttpRequest',
+ })['hashed_pass']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+ raise ExtractorError('Wrong password', expected=True)
+ raise
+ entries = OnDemandPagedList(functools.partial(
+ self._fetch_page, album_id, jwt, hashed_pass), self._PAGE_SIZE)
+ return self.playlist_result(
+ entries, album_id, album.get('name'), album.get('description'))
+
+
+class VimeoGroupsIE(VimeoChannelIE):
+ IE_NAME = 'vimeo:group'
+ _VALID_URL = r'https://vimeo\.com/groups/(?P<id>[^/]+)(?:/(?!videos?/\d+)|$)'
+ _TESTS = [{
+ 'url': 'https://vimeo.com/groups/kattykay',
+ 'info_dict': {
+ 'id': 'kattykay',
+ 'title': 'Katty Kay',
+ },
+ 'playlist_mincount': 27,
+ }]
+ _BASE_URL_TEMPL = 'https://vimeo.com/groups/%s'
+
+
+class VimeoReviewIE(VimeoBaseInfoExtractor):
+ IE_NAME = 'vimeo:review'
+ IE_DESC = 'Review pages on vimeo'
+ _VALID_URL = r'(?P<url>https://vimeo\.com/[^/]+/review/(?P<id>[^/]+)/[0-9a-f]{10})'
+ _TESTS = [{
+ 'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
+ 'md5': 'c507a72f780cacc12b2248bb4006d253',
+ 'info_dict': {
+ 'id': '75524534',
+ 'ext': 'mp4',
+ 'title': "DICK HARDWICK 'Comedian'",
+ 'uploader': 'Richard Hardwick',
+ 'uploader_id': 'user21297594',
+ 'description': "Comedian Dick Hardwick's five minute demo filmed in front of a live theater audience.\nEdit by Doug Mattocks",
+ },
+ 'expected_warnings': ['Unable to download JSON metadata'],
+ }, {
+ 'note': 'video player needs Referer',
+ 'url': 'https://vimeo.com/user22258446/review/91613211/13f927e053',
+ 'md5': '6295fdab8f4bf6a002d058b2c6dce276',
+ 'info_dict': {
+ 'id': '91613211',
+ 'ext': 'mp4',
+ 'title': 're:(?i)^Death by dogma versus assembling agile . Sander Hoogendoorn',
+ 'uploader': 'DevWeek Events',
+ 'duration': 2773,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader_id': 'user22258446',
+ },
+ 'skip': 'video gone',
+ }, {
+ 'note': 'Password protected',
+ 'url': 'https://vimeo.com/user37284429/review/138823582/c4d865efde',
+ 'info_dict': {
+ 'id': '138823582',
+ 'ext': 'mp4',
+ 'title': 'EFFICIENT PICKUP MASTERCLASS MODULE 1',
+ 'uploader': 'TMB',
+ 'uploader_id': 'user37284429',
+ },
+ 'params': {
+ 'videopassword': 'holygrail',
+ },
+ 'skip': 'video gone',
+ }]
+
+ def _real_initialize(self):
+ self._login()
+
+ def _real_extract(self, url):
+ page_url, video_id = re.match(self._VALID_URL, url).groups()
+ clip_data = self._download_json(
+ page_url.replace('/review/', '/review/data/'),
+ video_id)['clipData']
+ config_url = clip_data['configUrl']
+ config = self._download_json(config_url, video_id)
+ info_dict = self._parse_config(config, video_id)
+ source_format = self._extract_original_format(
+ page_url + '/action', video_id)
+ if source_format:
+ info_dict['formats'].append(source_format)
+ self._vimeo_sort_formats(info_dict['formats'])
+ info_dict['description'] = clean_html(clip_data.get('description'))
+ return info_dict
+
+
+class VimeoWatchLaterIE(VimeoChannelIE):
+ IE_NAME = 'vimeo:watchlater'
+ IE_DESC = 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)'
+ _VALID_URL = r'https://vimeo\.com/(?:home/)?watchlater|:vimeowatchlater'
+ _TITLE = 'Watch Later'
+ _LOGIN_REQUIRED = True
+ _TESTS = [{
+ 'url': 'https://vimeo.com/watchlater',
+ 'only_matching': True,
+ }]
+
+ def _real_initialize(self):
+ self._login()
+
+ def _page_url(self, base_url, pagenum):
+ url = '%s/page:%d/' % (base_url, pagenum)
+ request = sanitized_Request(url)
+ # Set the header to get a partial html page with the ids,
+ # the normal page doesn't contain them.
+ request.add_header('X-Requested-With', 'XMLHttpRequest')
+ return request
+
+ def _real_extract(self, url):
+ return self._extract_videos('watchlater', 'https://vimeo.com/watchlater')
+
+
+class VimeoLikesIE(VimeoChannelIE):
+ _VALID_URL = r'https://(?:www\.)?vimeo\.com/(?P<id>[^/]+)/likes/?(?:$|[?#]|sort:)'
+ IE_NAME = 'vimeo:likes'
+ IE_DESC = 'Vimeo user likes'
+ _TESTS = [{
+ 'url': 'https://vimeo.com/user755559/likes/',
+ 'playlist_mincount': 293,
+ 'info_dict': {
+ 'id': 'user755559',
+ 'title': 'urza’s Likes',
+ },
+ }, {
+ 'url': 'https://vimeo.com/stormlapse/likes',
+ 'only_matching': True,
+ }]
+
+ def _page_url(self, base_url, pagenum):
+ return '%s/page:%d/' % (base_url, pagenum)
+
+ def _real_extract(self, url):
+ user_id = self._match_id(url)
+ return self._extract_videos(user_id, 'https://vimeo.com/%s/likes' % user_id)
+
+
+class VHXEmbedIE(VimeoBaseInfoExtractor):
+ IE_NAME = 'vhx:embed'
+ _VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)'
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+ config_url = self._parse_json(self._search_regex(
+ r'window\.OTTData\s*=\s*({.+})', webpage,
+ 'ott data'), video_id, js_to_json)['config_url']
+ config = self._download_json(config_url, video_id)
+ info = self._parse_config(config, video_id)
+ self._vimeo_sort_formats(info['formats'])
+ return info
diff --git a/youtube_dl/extractor/vimple.py b/youtube_dlc/extractor/vimple.py
index c74b43766..c74b43766 100644
--- a/youtube_dl/extractor/vimple.py
+++ b/youtube_dlc/extractor/vimple.py
diff --git a/youtube_dl/extractor/vine.py b/youtube_dlc/extractor/vine.py
index 80b896b56..80b896b56 100644
--- a/youtube_dl/extractor/vine.py
+++ b/youtube_dlc/extractor/vine.py
diff --git a/youtube_dl/extractor/viqeo.py b/youtube_dlc/extractor/viqeo.py
index be7dfa814..be7dfa814 100644
--- a/youtube_dl/extractor/viqeo.py
+++ b/youtube_dlc/extractor/viqeo.py
diff --git a/youtube_dl/extractor/viu.py b/youtube_dlc/extractor/viu.py
index 3bd37525b..3bd37525b 100644
--- a/youtube_dl/extractor/viu.py
+++ b/youtube_dlc/extractor/viu.py
diff --git a/youtube_dlc/extractor/vk.py b/youtube_dlc/extractor/vk.py
new file mode 100644
index 000000000..00ec006c4
--- /dev/null
+++ b/youtube_dlc/extractor/vk.py
@@ -0,0 +1,678 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import collections
+import functools
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_urlparse
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ get_element_by_class,
+ int_or_none,
+ OnDemandPagedList,
+ orderedSet,
+ str_or_none,
+ str_to_int,
+ unescapeHTML,
+ unified_timestamp,
+ url_or_none,
+ urlencode_postdata,
+)
+from .dailymotion import DailymotionIE
+from .odnoklassniki import OdnoklassnikiIE
+from .pladform import PladformIE
+from .vimeo import VimeoIE
+from .youtube import YoutubeIE
+
+
+class VKBaseIE(InfoExtractor):
+ _NETRC_MACHINE = 'vk'
+
+ def _login(self):
+ username, password = self._get_login_info()
+ if username is None:
+ return
+
+ login_page, url_handle = self._download_webpage_handle(
+ 'https://vk.com', None, 'Downloading login page')
+
+ login_form = self._hidden_inputs(login_page)
+
+ login_form.update({
+ 'email': username.encode('cp1251'),
+ 'pass': password.encode('cp1251'),
+ })
+
+ # vk serves two same remixlhk cookies in Set-Cookie header and expects
+ # first one to be actually set
+ self._apply_first_set_cookie_header(url_handle, 'remixlhk')
+
+ login_page = self._download_webpage(
+ 'https://login.vk.com/?act=login', None,
+ note='Logging in',
+ data=urlencode_postdata(login_form))
+
+ if re.search(r'onLoginFailed', login_page):
+ raise ExtractorError(
+ 'Unable to login, incorrect username and/or password', expected=True)
+
+ def _real_initialize(self):
+ self._login()
+
+ def _download_payload(self, path, video_id, data, fatal=True):
+ data['al'] = 1
+ code, payload = self._download_json(
+ 'https://vk.com/%s.php' % path, video_id,
+ data=urlencode_postdata(data), fatal=fatal,
+ headers={'X-Requested-With': 'XMLHttpRequest'})['payload']
+ if code == '3':
+ self.raise_login_required()
+ elif code == '8':
+ raise ExtractorError(clean_html(payload[0][1:-1]), expected=True)
+ return payload
+
+
+class VKIE(VKBaseIE):
+ IE_NAME = 'vk'
+ IE_DESC = 'VK'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:
+ (?:
+ (?:(?:m|new)\.)?vk\.com/video_|
+ (?:www\.)?daxab.com/
+ )
+ ext\.php\?(?P<embed_query>.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+).*)|
+ (?:
+ (?:(?:m|new)\.)?vk\.com/(?:.+?\?.*?z=)?video|
+ (?:www\.)?daxab.com/embed/
+ )
+ (?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>[\da-f]+))?
+ )
+ '''
+ _TESTS = [
+ {
+ 'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521',
+ 'md5': '7babad3b85ea2e91948005b1b8b0cb84',
+ 'info_dict': {
+ 'id': '-77521_162222515',
+ 'ext': 'mp4',
+ 'title': 'ProtivoGunz - Хуёвая песня',
+ 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
+ 'uploader_id': '-77521',
+ 'duration': 195,
+ 'timestamp': 1329049880,
+ 'upload_date': '20120212',
+ },
+ },
+ {
+ 'url': 'http://vk.com/video205387401_165548505',
+ 'info_dict': {
+ 'id': '205387401_165548505',
+ 'ext': 'mp4',
+ 'title': 'No name',
+ 'uploader': 'Tom Cruise',
+ 'uploader_id': '205387401',
+ 'duration': 9,
+ 'timestamp': 1374364108,
+ 'upload_date': '20130720',
+ }
+ },
+ {
+ 'note': 'Embedded video',
+ 'url': 'https://vk.com/video_ext.php?oid=-77521&id=162222515&hash=87b046504ccd8bfa',
+ 'md5': '7babad3b85ea2e91948005b1b8b0cb84',
+ 'info_dict': {
+ 'id': '-77521_162222515',
+ 'ext': 'mp4',
+ 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*',
+ 'title': 'ProtivoGunz - Хуёвая песня',
+ 'duration': 195,
+ 'upload_date': '20120212',
+ 'timestamp': 1329049880,
+ 'uploader_id': '-77521',
+ },
+ },
+ {
+ # VIDEO NOW REMOVED
+ # please update if you find a video whose URL follows the same pattern
+ 'url': 'http://vk.com/video-8871596_164049491',
+ 'md5': 'a590bcaf3d543576c9bd162812387666',
+ 'note': 'Only available for registered users',
+ 'info_dict': {
+ 'id': '-8871596_164049491',
+ 'ext': 'mp4',
+ 'uploader': 'Триллеры',
+ 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]',
+ 'duration': 8352,
+ 'upload_date': '20121218',
+ 'view_count': int,
+ },
+ 'skip': 'Removed',
+ },
+ {
+ 'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d',
+ 'info_dict': {
+ 'id': '-43215063_168067957',
+ 'ext': 'mp4',
+ 'uploader': 'Bro Mazter',
+ 'title': ' ',
+ 'duration': 7291,
+ 'upload_date': '20140328',
+ 'uploader_id': '223413403',
+ 'timestamp': 1396018030,
+ },
+ 'skip': 'Requires vk account credentials',
+ },
+ {
+ 'url': 'http://m.vk.com/video-43215063_169084319?list=125c627d1aa1cebb83&from=wall-43215063_2566540',
+ 'md5': '0c45586baa71b7cb1d0784ee3f4e00a6',
+ 'note': 'ivi.ru embed',
+ 'info_dict': {
+ 'id': '-43215063_169084319',
+ 'ext': 'mp4',
+ 'title': 'Книга Илая',
+ 'duration': 6771,
+ 'upload_date': '20140626',
+ 'view_count': int,
+ },
+ 'skip': 'Removed',
+ },
+ {
+ # video (removed?) only available with list id
+ 'url': 'https://vk.com/video30481095_171201961?list=8764ae2d21f14088d4',
+ 'md5': '091287af5402239a1051c37ec7b92913',
+ 'info_dict': {
+ 'id': '30481095_171201961',
+ 'ext': 'mp4',
+ 'title': 'ТюменцевВВ_09.07.2015',
+ 'uploader': 'Anton Ivanov',
+ 'duration': 109,
+ 'upload_date': '20150709',
+ 'view_count': int,
+ },
+ 'skip': 'Removed',
+ },
+ {
+ # youtube embed
+ 'url': 'https://vk.com/video276849682_170681728',
+ 'info_dict': {
+ 'id': 'V3K4mi0SYkc',
+ 'ext': 'mp4',
+ 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate",
+ 'description': 'md5:bf9c26cfa4acdfb146362682edd3827a',
+ 'duration': 178,
+ 'upload_date': '20130116',
+ 'uploader': "Children's Joy Foundation Inc.",
+ 'uploader_id': 'thecjf',
+ 'view_count': int,
+ },
+ },
+ {
+ # dailymotion embed
+ 'url': 'https://vk.com/video-37468416_456239855',
+ 'info_dict': {
+ 'id': 'k3lz2cmXyRuJQSjGHUv',
+ 'ext': 'mp4',
+ 'title': 'md5:d52606645c20b0ddbb21655adaa4f56f',
+ 'description': 'md5:424b8e88cc873217f520e582ba28bb36',
+ 'uploader': 'AniLibria.Tv',
+ 'upload_date': '20160914',
+ 'uploader_id': 'x1p5vl5',
+ 'timestamp': 1473877246,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # video key is extra_data not url\d+
+ 'url': 'http://vk.com/video-110305615_171782105',
+ 'md5': 'e13fcda136f99764872e739d13fac1d1',
+ 'info_dict': {
+ 'id': '-110305615_171782105',
+ 'ext': 'mp4',
+ 'title': 'S-Dance, репетиции к The way show',
+ 'uploader': 'THE WAY SHOW | 17 апреля',
+ 'uploader_id': '-110305615',
+ 'timestamp': 1454859345,
+ 'upload_date': '20160207',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # finished live stream, postlive_mp4
+ 'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2',
+ 'info_dict': {
+ 'id': '-387766_456242764',
+ 'ext': 'mp4',
+ 'title': 'ИгроМир 2016 День 1 — Игромания Утром',
+ 'uploader': 'Игромания',
+ 'duration': 5239,
+ # TODO: use act=show to extract view_count
+ # 'view_count': int,
+ 'upload_date': '20160929',
+ 'uploader_id': '-387766',
+ 'timestamp': 1475137527,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # live stream, hls and rtmp links, most likely already finished live
+ # stream by the time you are reading this comment
+ 'url': 'https://vk.com/video-140332_456239111',
+ 'only_matching': True,
+ },
+ {
+ # removed video, just testing that we match the pattern
+ 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a',
+ 'only_matching': True,
+ },
+ {
+ # age restricted video, requires vk account credentials
+ 'url': 'https://vk.com/video205387401_164765225',
+ 'only_matching': True,
+ },
+ {
+ # pladform embed
+ 'url': 'https://vk.com/video-76116461_171554880',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://new.vk.com/video205387401_165548505',
+ 'only_matching': True,
+ },
+ {
+ # This video is no longer available, because its author has been blocked.
+ 'url': 'https://vk.com/video-10639516_456240611',
+ 'only_matching': True,
+ },
+ {
+ # The video is not available in your region.
+ 'url': 'https://vk.com/video-51812607_171445436',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('videoid')
+
+ mv_data = {}
+ if video_id:
+ data = {
+ 'act': 'show_inline',
+ 'video': video_id,
+ }
+ # Some videos (removed?) can only be downloaded with list id specified
+ list_id = mobj.group('list_id')
+ if list_id:
+ data['list'] = list_id
+
+ payload = self._download_payload('al_video', video_id, data)
+ info_page = payload[1]
+ opts = payload[-1]
+ mv_data = opts.get('mvData') or {}
+ player = opts.get('player') or {}
+ else:
+ video_id = '%s_%s' % (mobj.group('oid'), mobj.group('id'))
+
+ info_page = self._download_webpage(
+ 'http://vk.com/video_ext.php?' + mobj.group('embed_query'), video_id)
+
+ error_message = self._html_search_regex(
+ [r'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>',
+ r'(?s)<div[^>]+id="video_ext_msg"[^>]*>(.+?)</div>'],
+ info_page, 'error message', default=None)
+ if error_message:
+ raise ExtractorError(error_message, expected=True)
+
+ if re.search(r'<!>/login\.php\?.*\bact=security_check', info_page):
+ raise ExtractorError(
+ 'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.',
+ expected=True)
+
+ ERROR_COPYRIGHT = 'Video %s has been removed from public access due to rightholder complaint.'
+
+ ERRORS = {
+ r'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<':
+ ERROR_COPYRIGHT,
+
+ r'>The video .*? was removed from public access by request of the copyright holder.<':
+ ERROR_COPYRIGHT,
+
+ r'<!>Please log in or <':
+ 'Video %s is only available for registered users, '
+ 'use --username and --password options to provide account credentials.',
+
+ r'<!>Unknown error':
+ 'Video %s does not exist.',
+
+ r'<!>Видео временно недоступно':
+ 'Video %s is temporarily unavailable.',
+
+ r'<!>Access denied':
+ 'Access denied to video %s.',
+
+ r'<!>Видеозапись недоступна, так как её автор был заблокирован.':
+ 'Video %s is no longer available, because its author has been blocked.',
+
+ r'<!>This video is no longer available, because its author has been blocked.':
+ 'Video %s is no longer available, because its author has been blocked.',
+
+ r'<!>This video is no longer available, because it has been deleted.':
+ 'Video %s is no longer available, because it has been deleted.',
+
+ r'<!>The video .+? is not available in your region.':
+ 'Video %s is not available in your region.',
+ }
+
+ for error_re, error_msg in ERRORS.items():
+ if re.search(error_re, info_page):
+ raise ExtractorError(error_msg % video_id, expected=True)
+
+ player = self._parse_json(self._search_regex(
+ r'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n',
+ info_page, 'player params'), video_id)
+
+ youtube_url = YoutubeIE._extract_url(info_page)
+ if youtube_url:
+ return self.url_result(youtube_url, YoutubeIE.ie_key())
+
+ vimeo_url = VimeoIE._extract_url(url, info_page)
+ if vimeo_url is not None:
+ return self.url_result(vimeo_url, VimeoIE.ie_key())
+
+ pladform_url = PladformIE._extract_url(info_page)
+ if pladform_url:
+ return self.url_result(pladform_url, PladformIE.ie_key())
+
+ m_rutube = re.search(
+ r'\ssrc="((?:https?:)?//rutube\.ru\\?/(?:video|play)\\?/embed(?:.*?))\\?"', info_page)
+ if m_rutube is not None:
+ rutube_url = self._proto_relative_url(
+ m_rutube.group(1).replace('\\', ''))
+ return self.url_result(rutube_url)
+
+ dailymotion_urls = DailymotionIE._extract_urls(info_page)
+ if dailymotion_urls:
+ return self.url_result(dailymotion_urls[0], DailymotionIE.ie_key())
+
+ odnoklassniki_url = OdnoklassnikiIE._extract_url(info_page)
+ if odnoklassniki_url:
+ return self.url_result(odnoklassniki_url, OdnoklassnikiIE.ie_key())
+
+ m_opts = re.search(r'(?s)var\s+opts\s*=\s*({.+?});', info_page)
+ if m_opts:
+ m_opts_url = re.search(r"url\s*:\s*'((?!/\b)[^']+)", m_opts.group(1))
+ if m_opts_url:
+ opts_url = m_opts_url.group(1)
+ if opts_url.startswith('//'):
+ opts_url = 'http:' + opts_url
+ return self.url_result(opts_url)
+
+ data = player['params'][0]
+ title = unescapeHTML(data['md_title'])
+
+ # 2 = live
+ # 3 = post live (finished live)
+ is_live = data.get('live') == 2
+ if is_live:
+ title = self._live_title(title)
+
+ timestamp = unified_timestamp(self._html_search_regex(
+ r'class=["\']mv_info_date[^>]+>([^<]+)(?:<|from)', info_page,
+ 'upload date', default=None)) or int_or_none(data.get('date'))
+
+ view_count = str_to_int(self._search_regex(
+ r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)',
+ info_page, 'view count', default=None))
+
+ formats = []
+ for format_id, format_url in data.items():
+ format_url = url_or_none(format_url)
+ if not format_url or not format_url.startswith(('http', '//', 'rtmp')):
+ continue
+ if (format_id.startswith(('url', 'cache'))
+ or format_id in ('extra_data', 'live_mp4', 'postlive_mp4')):
+ height = int_or_none(self._search_regex(
+ r'^(?:url|cache)(\d+)', format_id, 'height', default=None))
+ formats.append({
+ 'format_id': format_id,
+ 'url': format_url,
+ 'height': height,
+ })
+ elif format_id == 'hls':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', 'm3u8_native',
+ m3u8_id=format_id, fatal=False, live=is_live))
+ elif format_id == 'rtmp':
+ formats.append({
+ 'format_id': format_id,
+ 'url': format_url,
+ 'ext': 'flv',
+ })
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'title': title,
+ 'thumbnail': data.get('jpg'),
+ 'uploader': data.get('md_author'),
+ 'uploader_id': str_or_none(data.get('author_id') or mv_data.get('authorId')),
+ 'duration': int_or_none(data.get('duration') or mv_data.get('duration')),
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'like_count': int_or_none(mv_data.get('likes')),
+ 'comment_count': int_or_none(mv_data.get('commcount')),
+ 'is_live': is_live,
+ }
+
+
+class VKUserVideosIE(VKBaseIE):
+ IE_NAME = 'vk:uservideos'
+ IE_DESC = "VK - User's Videos"
+ _VALID_URL = r'https?://(?:(?:m|new)\.)?vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&](?:.*?\bsection=(?P<section>\w+))?|$)'
+ _TEMPLATE_URL = 'https://vk.com/videos'
+ _TESTS = [{
+ 'url': 'https://vk.com/videos-767561',
+ 'info_dict': {
+ 'id': '-767561_all',
+ },
+ 'playlist_mincount': 1150,
+ }, {
+ 'url': 'https://vk.com/videos-767561?section=uploaded',
+ 'info_dict': {
+ 'id': '-767561_uploaded',
+ },
+ 'playlist_mincount': 425,
+ }, {
+ 'url': 'http://vk.com/videos205387401',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://vk.com/videos-77521',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://vk.com/videos-97664626?section=all',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://m.vk.com/videos205387401',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://new.vk.com/videos205387401',
+ 'only_matching': True,
+ }]
+ _PAGE_SIZE = 1000
+ _VIDEO = collections.namedtuple('Video', ['owner_id', 'id'])
+
+ def _fetch_page(self, page_id, section, page):
+ l = self._download_payload('al_video', page_id, {
+ 'act': 'load_videos_silent',
+ 'offset': page * self._PAGE_SIZE,
+ 'oid': page_id,
+ 'section': section,
+ })[0][section]['list']
+
+ for video in l:
+ v = self._VIDEO._make(video[:2])
+ video_id = '%d_%d' % (v.owner_id, v.id)
+ yield self.url_result(
+ 'http://vk.com/video' + video_id, VKIE.ie_key(), video_id)
+
+ def _real_extract(self, url):
+ page_id, section = re.match(self._VALID_URL, url).groups()
+ if not section:
+ section = 'all'
+
+ entries = OnDemandPagedList(
+ functools.partial(self._fetch_page, page_id, section),
+ self._PAGE_SIZE)
+
+ return self.playlist_result(entries, '%s_%s' % (page_id, section))
+
+
+class VKWallPostIE(VKBaseIE):
+ IE_NAME = 'vk:wallpost'
+ _VALID_URL = r'https?://(?:(?:(?:(?:m|new)\.)?vk\.com/(?:[^?]+\?.*\bw=)?wall(?P<id>-?\d+_\d+)))'
+ _TESTS = [{
+ # public page URL, audio playlist
+ 'url': 'https://vk.com/bs.official?w=wall-23538238_35',
+ 'info_dict': {
+ 'id': '-23538238_35',
+ 'title': 'Black Shadow - Wall post -23538238_35',
+ 'description': 'md5:3f84b9c4f9ef499731cf1ced9998cc0c',
+ },
+ 'playlist': [{
+ 'md5': '5ba93864ec5b85f7ce19a9af4af080f6',
+ 'info_dict': {
+ 'id': '135220665_111806521',
+ 'ext': 'mp4',
+ 'title': 'Black Shadow - Слепое Верование',
+ 'duration': 370,
+ 'uploader': 'Black Shadow',
+ 'artist': 'Black Shadow',
+ 'track': 'Слепое Верование',
+ },
+ }, {
+ 'md5': '4cc7e804579122b17ea95af7834c9233',
+ 'info_dict': {
+ 'id': '135220665_111802303',
+ 'ext': 'mp4',
+ 'title': 'Black Shadow - Война - Негасимое Бездны Пламя!',
+ 'duration': 423,
+ 'uploader': 'Black Shadow',
+ 'artist': 'Black Shadow',
+ 'track': 'Война - Негасимое Бездны Пламя!',
+ },
+ }],
+ 'params': {
+ 'skip_download': True,
+ 'usenetrc': True,
+ },
+ 'skip': 'Requires vk account credentials',
+ }, {
+ # single YouTube embed, no leading -
+ 'url': 'https://vk.com/wall85155021_6319',
+ 'info_dict': {
+ 'id': '85155021_6319',
+ 'title': 'Сергей Горбунов - Wall post 85155021_6319',
+ },
+ 'playlist_count': 1,
+ 'params': {
+ 'usenetrc': True,
+ },
+ 'skip': 'Requires vk account credentials',
+ }, {
+ # wall page URL
+ 'url': 'https://vk.com/wall-23538238_35',
+ 'only_matching': True,
+ }, {
+ # mobile wall page URL
+ 'url': 'https://m.vk.com/wall-23538238_35',
+ 'only_matching': True,
+ }]
+ _BASE64_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/='
+ _AUDIO = collections.namedtuple('Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads'])
+
+ def _decode(self, enc):
+ dec = ''
+ e = n = 0
+ for c in enc:
+ r = self._BASE64_CHARS.index(c)
+ cond = n % 4
+ e = 64 * e + r if cond else r
+ n += 1
+ if cond:
+ dec += chr(255 & e >> (-2 * n & 6))
+ return dec
+
+ def _unmask_url(self, mask_url, vk_id):
+ if 'audio_api_unavailable' in mask_url:
+ extra = mask_url.split('?extra=')[1].split('#')
+ func, base = self._decode(extra[1]).split(chr(11))
+ mask_url = list(self._decode(extra[0]))
+ url_len = len(mask_url)
+ indexes = [None] * url_len
+ index = int(base) ^ vk_id
+ for n in range(url_len - 1, -1, -1):
+ index = (url_len * (n + 1) ^ index + n) % url_len
+ indexes[n] = index
+ for n in range(1, url_len):
+ c = mask_url[n]
+ index = indexes[url_len - 1 - n]
+ mask_url[n] = mask_url[index]
+ mask_url[index] = c
+ mask_url = ''.join(mask_url)
+ return mask_url
+
+ def _real_extract(self, url):
+ post_id = self._match_id(url)
+
+ webpage = self._download_payload('wkview', post_id, {
+ 'act': 'show',
+ 'w': 'wall' + post_id,
+ })[1]
+
+ description = clean_html(get_element_by_class('wall_post_text', webpage))
+ uploader = clean_html(get_element_by_class('author', webpage))
+
+ entries = []
+
+ for audio in re.findall(r'data-audio="([^"]+)', webpage):
+ audio = self._parse_json(unescapeHTML(audio), post_id)
+ a = self._AUDIO._make(audio[:16])
+ if not a.url:
+ continue
+ title = unescapeHTML(a.title)
+ performer = unescapeHTML(a.performer)
+ entries.append({
+ 'id': '%s_%s' % (a.owner_id, a.id),
+ 'url': self._unmask_url(a.url, a.ads['vk_id']),
+ 'title': '%s - %s' % (performer, title) if performer else title,
+ 'thumbnails': [{'url': c_url} for c_url in a.cover_url.split(',')] if a.cover_url else None,
+ 'duration': int_or_none(a.duration),
+ 'uploader': uploader,
+ 'artist': performer,
+ 'track': title,
+ 'ext': 'mp4',
+ 'protocol': 'm3u8',
+ })
+
+ for video in re.finditer(
+ r'<a[^>]+href=(["\'])(?P<url>/video(?:-?[\d_]+).*?)\1', webpage):
+ entries.append(self.url_result(
+ compat_urlparse.urljoin(url, video.group('url')), VKIE.ie_key()))
+
+ title = 'Wall post %s' % post_id
+
+ return self.playlist_result(
+ orderedSet(entries), post_id,
+ '%s - %s' % (uploader, title) if uploader else title,
+ description)
diff --git a/youtube_dlc/extractor/vlive.py b/youtube_dlc/extractor/vlive.py
new file mode 100644
index 000000000..f79531e6f
--- /dev/null
+++ b/youtube_dlc/extractor/vlive.py
@@ -0,0 +1,367 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import time
+import itertools
+
+from .common import InfoExtractor
+from .naver import NaverBaseIE
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ merge_dicts,
+ remove_start,
+ try_get,
+ urlencode_postdata,
+)
+
+
+class VLiveIE(NaverBaseIE):
+ IE_NAME = 'vlive'
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
+ _NETRC_MACHINE = 'vlive'
+ _TESTS = [{
+ 'url': 'http://www.vlive.tv/video/1326',
+ 'md5': 'cc7314812855ce56de70a06a27314983',
+ 'info_dict': {
+ 'id': '1326',
+ 'ext': 'mp4',
+ 'title': "[V LIVE] Girl's Day's Broadcast",
+ 'creator': "Girl's Day",
+ 'view_count': int,
+ 'uploader_id': 'muploader_a',
+ },
+ }, {
+ 'url': 'http://www.vlive.tv/video/16937',
+ 'info_dict': {
+ 'id': '16937',
+ 'ext': 'mp4',
+ 'title': '[V LIVE] 첸백시 걍방',
+ 'creator': 'EXO',
+ 'view_count': int,
+ 'subtitles': 'mincount:12',
+ 'uploader_id': 'muploader_j',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.vlive.tv/video/129100',
+ 'md5': 'ca2569453b79d66e5b919e5d308bff6b',
+ 'info_dict': {
+ 'id': '129100',
+ 'ext': 'mp4',
+ 'title': '[V LIVE] [BTS+] Run BTS! 2019 - EP.71 :: Behind the scene',
+ 'creator': 'BTS+',
+ 'view_count': int,
+ 'subtitles': 'mincount:10',
+ },
+ 'skip': 'This video is only available for CH+ subscribers',
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if VLivePlaylistIE.suitable(url) else super(VLiveIE, cls).suitable(url)
+
+ def _real_initialize(self):
+ self._login()
+
+ def _login(self):
+ email, password = self._get_login_info()
+ if None in (email, password):
+ return
+
+ def is_logged_in():
+ login_info = self._download_json(
+ 'https://www.vlive.tv/auth/loginInfo', None,
+ note='Downloading login info',
+ headers={'Referer': 'https://www.vlive.tv/home'})
+ return try_get(
+ login_info, lambda x: x['message']['login'], bool) or False
+
+ LOGIN_URL = 'https://www.vlive.tv/auth/email/login'
+ self._request_webpage(
+ LOGIN_URL, None, note='Downloading login cookies')
+
+ self._download_webpage(
+ LOGIN_URL, None, note='Logging in',
+ data=urlencode_postdata({'email': email, 'pwd': password}),
+ headers={
+ 'Referer': LOGIN_URL,
+ 'Content-Type': 'application/x-www-form-urlencoded'
+ })
+
+ if not is_logged_in():
+ raise ExtractorError('Unable to log in', expected=True)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'https://www.vlive.tv/video/%s' % video_id, video_id)
+
+ VIDEO_PARAMS_RE = r'\bvlive\.video\.init\(([^)]+)'
+ VIDEO_PARAMS_FIELD = 'video params'
+
+ params = self._parse_json(self._search_regex(
+ VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD, default=''), video_id,
+ transform_source=lambda s: '[' + s + ']', fatal=False)
+
+ if not params or len(params) < 7:
+ params = self._search_regex(
+ VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD)
+ params = [p.strip(r'"') for p in re.split(r'\s*,\s*', params)]
+
+ status, long_video_id, key = params[2], params[5], params[6]
+ status = remove_start(status, 'PRODUCT_')
+
+ if status in ('LIVE_ON_AIR', 'BIG_EVENT_ON_AIR'):
+ return self._live(video_id, webpage)
+ elif status in ('VOD_ON_AIR', 'BIG_EVENT_INTRO'):
+ return self._replay(video_id, webpage, long_video_id, key)
+
+ if status == 'LIVE_END':
+ raise ExtractorError('Uploading for replay. Please wait...',
+ expected=True)
+ elif status == 'COMING_SOON':
+ raise ExtractorError('Coming soon!', expected=True)
+ elif status == 'CANCELED':
+ raise ExtractorError('We are sorry, '
+ 'but the live broadcast has been canceled.',
+ expected=True)
+ elif status == 'ONLY_APP':
+ raise ExtractorError('Unsupported video type', expected=True)
+ else:
+ raise ExtractorError('Unknown status %s' % status)
+
+ def _get_common_fields(self, webpage):
+ title = self._og_search_title(webpage)
+ creator = self._html_search_regex(
+ r'<div[^>]+class="info_area"[^>]*>\s*(?:<em[^>]*>.*?</em\s*>\s*)?<a\s+[^>]*>([^<]+)',
+ webpage, 'creator', fatal=False)
+ thumbnail = self._og_search_thumbnail(webpage)
+ return {
+ 'title': title,
+ 'creator': creator,
+ 'thumbnail': thumbnail,
+ }
+
+ def _live(self, video_id, webpage):
+ init_page = self._download_init_page(video_id)
+
+ live_params = self._search_regex(
+ r'"liveStreamInfo"\s*:\s*(".*"),',
+ init_page, 'live stream info')
+ live_params = self._parse_json(live_params, video_id)
+ live_params = self._parse_json(live_params, video_id)
+
+ formats = []
+ for vid in live_params.get('resolutions', []):
+ formats.extend(self._extract_m3u8_formats(
+ vid['cdnUrl'], video_id, 'mp4',
+ m3u8_id=vid.get('name'),
+ fatal=False, live=True))
+ self._sort_formats(formats)
+
+ info = self._get_common_fields(webpage)
+ info.update({
+ 'title': self._live_title(info['title']),
+ 'id': video_id,
+ 'formats': formats,
+ 'is_live': True,
+ })
+ return info
+
+ def _replay(self, video_id, webpage, long_video_id, key):
+ if '' in (long_video_id, key):
+ init_page = self._download_init_page(video_id)
+ video_info = self._parse_json(self._search_regex(
+ (r'(?s)oVideoStatus\s*=\s*({.+?})\s*</script',
+ r'(?s)oVideoStatus\s*=\s*({.+})'), init_page, 'video info'),
+ video_id)
+ if video_info.get('status') == 'NEED_CHANNEL_PLUS':
+ self.raise_login_required(
+ 'This video is only available for CH+ subscribers')
+ long_video_id, key = video_info['vid'], video_info['inkey']
+
+ return merge_dicts(
+ self._get_common_fields(webpage),
+ self._extract_video_info(video_id, long_video_id, key))
+
+ def _download_init_page(self, video_id):
+ return self._download_webpage(
+ 'https://www.vlive.tv/video/init/view',
+ video_id, note='Downloading live webpage',
+ data=urlencode_postdata({'videoSeq': video_id}),
+ headers={
+ 'Referer': 'https://www.vlive.tv/video/%s' % video_id,
+ 'Content-Type': 'application/x-www-form-urlencoded'
+ })
+
+
+class VLiveChannelIE(InfoExtractor):
+ IE_NAME = 'vlive:channel'
+ _VALID_URL = r'https?://channels\.vlive\.tv/(?P<id>[0-9A-Z]+)'
+ _TEST = {
+ 'url': 'http://channels.vlive.tv/FCD4B',
+ 'info_dict': {
+ 'id': 'FCD4B',
+ 'title': 'MAMAMOO',
+ },
+ 'playlist_mincount': 110
+ }
+ _APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
+
+ def _real_extract(self, url):
+ channel_code = self._match_id(url)
+
+ webpage = self._download_webpage(
+ 'http://channels.vlive.tv/%s/video' % channel_code, channel_code)
+
+ app_id = None
+
+ app_js_url = self._search_regex(
+ r'<script[^>]+src=(["\'])(?P<url>http.+?/app\.js.*?)\1',
+ webpage, 'app js', default=None, group='url')
+
+ if app_js_url:
+ app_js = self._download_webpage(
+ app_js_url, channel_code, 'Downloading app JS', fatal=False)
+ if app_js:
+ app_id = self._search_regex(
+ r'Global\.VFAN_APP_ID\s*=\s*[\'"]([^\'"]+)[\'"]',
+ app_js, 'app id', default=None)
+
+ app_id = app_id or self._APP_ID
+
+ channel_info = self._download_json(
+ 'http://api.vfan.vlive.tv/vproxy/channelplus/decodeChannelCode',
+ channel_code, note='Downloading decode channel code',
+ query={
+ 'app_id': app_id,
+ 'channelCode': channel_code,
+ '_': int(time.time())
+ })
+
+ channel_seq = channel_info['result']['channelSeq']
+ channel_name = None
+ entries = []
+
+ for page_num in itertools.count(1):
+ video_list = self._download_json(
+ 'http://api.vfan.vlive.tv/vproxy/channelplus/getChannelVideoList',
+ channel_code, note='Downloading channel list page #%d' % page_num,
+ query={
+ 'app_id': app_id,
+ 'channelSeq': channel_seq,
+ # Large values of maxNumOfRows (~300 or above) may cause
+ # empty responses (see [1]), e.g. this happens for [2] that
+ # has more than 300 videos.
+ # 1. https://github.com/ytdl-org/youtube-dl/issues/13830
+ # 2. http://channels.vlive.tv/EDBF.
+ 'maxNumOfRows': 100,
+ '_': int(time.time()),
+ 'pageNo': page_num
+ }
+ )
+
+ if not channel_name:
+ channel_name = try_get(
+ video_list,
+ lambda x: x['result']['channelInfo']['channelName'],
+ compat_str)
+
+ videos = try_get(
+ video_list, lambda x: x['result']['videoList'], list)
+ if not videos:
+ break
+
+ for video in videos:
+ video_id = video.get('videoSeq')
+ if not video_id:
+ continue
+ video_id = compat_str(video_id)
+ entries.append(
+ self.url_result(
+ 'http://www.vlive.tv/video/%s' % video_id,
+ ie=VLiveIE.ie_key(), video_id=video_id))
+
+ return self.playlist_result(
+ entries, channel_code, channel_name)
+
+
+class VLivePlaylistIE(InfoExtractor):
+ IE_NAME = 'vlive:playlist'
+ _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<video_id>[0-9]+)/playlist/(?P<id>[0-9]+)'
+ _VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s'
+ _TESTS = [{
+ # regular working playlist
+ 'url': 'https://www.vlive.tv/video/117956/playlist/117963',
+ 'info_dict': {
+ 'id': '117963',
+ 'title': '아이돌룸(IDOL ROOM) 41회 - (여자)아이들'
+ },
+ 'playlist_mincount': 10
+ }, {
+ # playlist with no playlistVideoSeqs
+ 'url': 'http://www.vlive.tv/video/22867/playlist/22912',
+ 'info_dict': {
+ 'id': '22867',
+ 'ext': 'mp4',
+ 'title': '[V LIVE] Valentine Day Message from MINA',
+ 'creator': 'TWICE',
+ 'view_count': int
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ }]
+
+ def _build_video_result(self, video_id, message):
+ self.to_screen(message)
+ return self.url_result(
+ self._VIDEO_URL_TEMPLATE % video_id,
+ ie=VLiveIE.ie_key(), video_id=video_id)
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id, playlist_id = mobj.group('video_id', 'id')
+
+ if self._downloader.params.get('noplaylist'):
+ return self._build_video_result(
+ video_id,
+ 'Downloading just video %s because of --no-playlist'
+ % video_id)
+
+ self.to_screen(
+ 'Downloading playlist %s - add --no-playlist to just download video'
+ % playlist_id)
+
+ webpage = self._download_webpage(
+ 'http://www.vlive.tv/video/%s/playlist/%s'
+ % (video_id, playlist_id), playlist_id)
+
+ raw_item_ids = self._search_regex(
+ r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage,
+ 'playlist video seqs', default=None, fatal=False)
+
+ if not raw_item_ids:
+ return self._build_video_result(
+ video_id,
+ 'Downloading just video %s because no playlist was found'
+ % video_id)
+
+ item_ids = self._parse_json(raw_item_ids, playlist_id)
+
+ entries = [
+ self.url_result(
+ self._VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(),
+ video_id=compat_str(item_id))
+ for item_id in item_ids]
+
+ playlist_name = self._html_search_regex(
+ r'<div[^>]+class="[^"]*multicam_playlist[^>]*>\s*<h3[^>]+>([^<]+)',
+ webpage, 'playlist title', fatal=False)
+
+ return self.playlist_result(entries, playlist_id, playlist_name)
diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dlc/extractor/vodlocker.py
index 02c9617d2..02c9617d2 100644
--- a/youtube_dl/extractor/vodlocker.py
+++ b/youtube_dlc/extractor/vodlocker.py
diff --git a/youtube_dl/extractor/vodpl.py b/youtube_dlc/extractor/vodpl.py
index 9e919708e..9e919708e 100644
--- a/youtube_dl/extractor/vodpl.py
+++ b/youtube_dlc/extractor/vodpl.py
diff --git a/youtube_dlc/extractor/vodplatform.py b/youtube_dlc/extractor/vodplatform.py
new file mode 100644
index 000000000..74d2257e7
--- /dev/null
+++ b/youtube_dlc/extractor/vodplatform.py
@@ -0,0 +1,40 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..utils import unescapeHTML
+
+
+class VODPlatformIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www\.)?vod-platform\.net|embed\.kwikmotion\.com)/[eE]mbed/(?P<id>[^/?#]+)'
+ _TESTS = [{
+ # from http://www.lbcgroup.tv/watch/chapter/29143/52844/%D8%A7%D9%84%D9%86%D8%B5%D8%B1%D8%A9-%D9%81%D9%8A-%D8%B6%D9%8A%D8%A7%D9%81%D8%A9-%D8%A7%D9%84%D9%80-cnn/ar
+ 'url': 'http://vod-platform.net/embed/RufMcytHDolTH1MuKHY9Fw',
+ 'md5': '1db2b7249ce383d6be96499006e951fc',
+ 'info_dict': {
+ 'id': 'RufMcytHDolTH1MuKHY9Fw',
+ 'ext': 'mp4',
+ 'title': 'LBCi News_ النصرة في ضيافة الـ "سي.أن.أن"',
+ }
+ }, {
+ 'url': 'http://embed.kwikmotion.com/embed/RufMcytHDolTH1MuKHY9Fw',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ webpage = self._download_webpage(url, video_id)
+
+ title = unescapeHTML(self._og_search_title(webpage))
+ hidden_inputs = self._hidden_inputs(webpage)
+
+ formats = self._extract_wowza_formats(
+ hidden_inputs.get('HiddenmyhHlsLink') or hidden_inputs['HiddenmyDashLink'], video_id, skip_protocols=['f4m', 'smil'])
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': hidden_inputs.get('HiddenThumbnail') or self._og_search_thumbnail(webpage),
+ 'formats': formats,
+ }
diff --git a/youtube_dlc/extractor/voicerepublic.py b/youtube_dlc/extractor/voicerepublic.py
new file mode 100644
index 000000000..a52e40afa
--- /dev/null
+++ b/youtube_dlc/extractor/voicerepublic.py
@@ -0,0 +1,62 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ determine_ext,
+ int_or_none,
+ urljoin,
+)
+
+
+class VoiceRepublicIE(InfoExtractor):
+ _VALID_URL = r'https?://voicerepublic\.com/(?:talks|embed)/(?P<id>[0-9a-z-]+)'
+ _TESTS = [{
+ 'url': 'http://voicerepublic.com/talks/watching-the-watchers-building-a-sousveillance-state',
+ 'md5': 'b9174d651323f17783000876347116e3',
+ 'info_dict': {
+ 'id': '2296',
+ 'display_id': 'watching-the-watchers-building-a-sousveillance-state',
+ 'ext': 'm4a',
+ 'title': 'Watching the Watchers: Building a Sousveillance State',
+ 'description': 'Secret surveillance programs have metadata too. The people and companies that operate secret surveillance programs can be surveilled.',
+ 'duration': 1556,
+ 'view_count': int,
+ }
+ }, {
+ 'url': 'http://voicerepublic.com/embed/watching-the-watchers-building-a-sousveillance-state',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ display_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, display_id)
+
+ if '>Queued for processing, please stand by...<' in webpage:
+ raise ExtractorError(
+ 'Audio is still queued for processing', expected=True)
+
+ talk = self._parse_json(self._search_regex(
+ r'initialSnapshot\s*=\s*({.+?});',
+ webpage, 'talk'), display_id)['talk']
+ title = talk['title']
+ formats = [{
+ 'url': urljoin(url, talk_url),
+ 'format_id': format_id,
+ 'ext': determine_ext(talk_url) or format_id,
+ 'vcodec': 'none',
+ } for format_id, talk_url in talk['media_links'].items()]
+ self._sort_formats(formats)
+
+ return {
+ 'id': compat_str(talk.get('id') or display_id),
+ 'display_id': display_id,
+ 'title': title,
+ 'description': talk.get('teaser'),
+ 'thumbnail': talk.get('image_url'),
+ 'duration': int_or_none(talk.get('archived_duration')),
+ 'view_count': int_or_none(talk.get('play_count')),
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/voot.py b/youtube_dlc/extractor/voot.py
index 751b21ee5..751b21ee5 100644
--- a/youtube_dl/extractor/voot.py
+++ b/youtube_dlc/extractor/voot.py
diff --git a/youtube_dl/extractor/voxmedia.py b/youtube_dlc/extractor/voxmedia.py
index b318e15d4..b318e15d4 100644
--- a/youtube_dl/extractor/voxmedia.py
+++ b/youtube_dlc/extractor/voxmedia.py
diff --git a/youtube_dl/extractor/vrak.py b/youtube_dlc/extractor/vrak.py
index daa247cce..daa247cce 100644
--- a/youtube_dl/extractor/vrak.py
+++ b/youtube_dlc/extractor/vrak.py
diff --git a/youtube_dl/extractor/vrt.py b/youtube_dlc/extractor/vrt.py
index 422025267..422025267 100644
--- a/youtube_dl/extractor/vrt.py
+++ b/youtube_dlc/extractor/vrt.py
diff --git a/youtube_dl/extractor/vrv.py b/youtube_dlc/extractor/vrv.py
index 6e51469b0..6e51469b0 100644
--- a/youtube_dl/extractor/vrv.py
+++ b/youtube_dlc/extractor/vrv.py
diff --git a/youtube_dl/extractor/vshare.py b/youtube_dlc/extractor/vshare.py
index c631ac1fa..c631ac1fa 100644
--- a/youtube_dl/extractor/vshare.py
+++ b/youtube_dlc/extractor/vshare.py
diff --git a/youtube_dl/extractor/vube.py b/youtube_dlc/extractor/vube.py
index 8ce3a6b81..8ce3a6b81 100644
--- a/youtube_dl/extractor/vube.py
+++ b/youtube_dlc/extractor/vube.py
diff --git a/youtube_dl/extractor/vuclip.py b/youtube_dlc/extractor/vuclip.py
index 55e087bdb..55e087bdb 100644
--- a/youtube_dl/extractor/vuclip.py
+++ b/youtube_dlc/extractor/vuclip.py
diff --git a/youtube_dl/extractor/vvvvid.py b/youtube_dlc/extractor/vvvvid.py
index 6906cd2ab..6906cd2ab 100644
--- a/youtube_dl/extractor/vvvvid.py
+++ b/youtube_dlc/extractor/vvvvid.py
diff --git a/youtube_dl/extractor/vyborymos.py b/youtube_dlc/extractor/vyborymos.py
index 9e703c4b6..9e703c4b6 100644
--- a/youtube_dl/extractor/vyborymos.py
+++ b/youtube_dlc/extractor/vyborymos.py
diff --git a/youtube_dlc/extractor/vzaar.py b/youtube_dlc/extractor/vzaar.py
new file mode 100644
index 000000000..b7d02fca3
--- /dev/null
+++ b/youtube_dlc/extractor/vzaar.py
@@ -0,0 +1,112 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ int_or_none,
+ float_or_none,
+ unified_timestamp,
+ url_or_none,
+)
+
+
+class VzaarIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P<id>\d+)'
+ _TESTS = [{
+ # HTTP and HLS
+ 'url': 'https://vzaar.com/videos/1152805',
+ 'md5': 'bde5ddfeb104a6c56a93a06b04901dbf',
+ 'info_dict': {
+ 'id': '1152805',
+ 'ext': 'mp4',
+ 'title': 'sample video (public)',
+ },
+ }, {
+ 'url': 'https://view.vzaar.com/27272/player',
+ 'md5': '3b50012ac9bbce7f445550d54e0508f2',
+ 'info_dict': {
+ 'id': '27272',
+ 'ext': 'mp3',
+ 'title': 'MP3',
+ },
+ }, {
+ # hlsAes = true
+ 'url': 'https://view.vzaar.com/11379930/player',
+ 'info_dict': {
+ 'id': '11379930',
+ 'ext': 'mp4',
+ 'title': 'Videoaula',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ # with null videoTitle
+ 'url': 'https://view.vzaar.com/20313539/download',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+src=["\']((?:https?:)?//(?:view\.vzaar\.com)/[0-9]+)',
+ webpage)
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ video_data = self._download_json(
+ 'http://view.vzaar.com/v2/%s/video' % video_id, video_id)
+
+ title = video_data.get('videoTitle') or video_id
+
+ formats = []
+
+ source_url = url_or_none(video_data.get('sourceUrl'))
+ if source_url:
+ f = {
+ 'url': source_url,
+ 'format_id': 'http',
+ 'preference': 1,
+ }
+ if 'audio' in source_url:
+ f.update({
+ 'vcodec': 'none',
+ 'ext': 'mp3',
+ })
+ else:
+ f.update({
+ 'width': int_or_none(video_data.get('width')),
+ 'height': int_or_none(video_data.get('height')),
+ 'ext': 'mp4',
+ 'fps': float_or_none(video_data.get('fps')),
+ })
+ formats.append(f)
+
+ video_guid = video_data.get('guid')
+ usp = video_data.get('usp')
+ if video_data.get('uspEnabled') and isinstance(video_guid, compat_str) and isinstance(usp, dict):
+ hls_aes = video_data.get('hlsAes')
+ qs = '&'.join('%s=%s' % (k, v) for k, v in usp.items())
+ url_templ = 'http://%%s.vzaar.com/v5/usp%s/%s/%s.ism%%s?' % ('aes' if hls_aes else '', video_guid, video_id)
+ m3u8_formats = self._extract_m3u8_formats(
+ url_templ % ('fable', '/.m3u8') + qs, video_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False)
+ if hls_aes:
+ for f in m3u8_formats:
+ f['_decryption_key_url'] = url_templ % ('goose', '') + qs
+ formats.extend(m3u8_formats)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': self._proto_relative_url(video_data.get('poster')),
+ 'duration': float_or_none(video_data.get('videoDuration')),
+ 'timestamp': unified_timestamp(video_data.get('ts')),
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/wakanim.py b/youtube_dlc/extractor/wakanim.py
index f9a2395d9..f9a2395d9 100644
--- a/youtube_dl/extractor/wakanim.py
+++ b/youtube_dlc/extractor/wakanim.py
diff --git a/youtube_dl/extractor/walla.py b/youtube_dlc/extractor/walla.py
index cbb548672..cbb548672 100644
--- a/youtube_dl/extractor/walla.py
+++ b/youtube_dlc/extractor/walla.py
diff --git a/youtube_dl/extractor/washingtonpost.py b/youtube_dlc/extractor/washingtonpost.py
index 625d0a1cc..625d0a1cc 100644
--- a/youtube_dl/extractor/washingtonpost.py
+++ b/youtube_dlc/extractor/washingtonpost.py
diff --git a/youtube_dl/extractor/wat.py b/youtube_dlc/extractor/wat.py
index 8ef3e0906..8ef3e0906 100644
--- a/youtube_dl/extractor/wat.py
+++ b/youtube_dlc/extractor/wat.py
diff --git a/youtube_dl/extractor/watchbox.py b/youtube_dlc/extractor/watchbox.py
index 5a4e46e73..5a4e46e73 100644
--- a/youtube_dl/extractor/watchbox.py
+++ b/youtube_dlc/extractor/watchbox.py
diff --git a/youtube_dl/extractor/watchindianporn.py b/youtube_dlc/extractor/watchindianporn.py
index fadc539ee..fadc539ee 100644
--- a/youtube_dl/extractor/watchindianporn.py
+++ b/youtube_dlc/extractor/watchindianporn.py
diff --git a/youtube_dl/extractor/wdr.py b/youtube_dlc/extractor/wdr.py
index 44d4a13ca..44d4a13ca 100644
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dlc/extractor/wdr.py
diff --git a/youtube_dl/extractor/webcaster.py b/youtube_dlc/extractor/webcaster.py
index e4b65f54f..e4b65f54f 100644
--- a/youtube_dl/extractor/webcaster.py
+++ b/youtube_dlc/extractor/webcaster.py
diff --git a/youtube_dl/extractor/webofstories.py b/youtube_dlc/extractor/webofstories.py
index f2b8d19b4..f2b8d19b4 100644
--- a/youtube_dl/extractor/webofstories.py
+++ b/youtube_dlc/extractor/webofstories.py
diff --git a/youtube_dl/extractor/weibo.py b/youtube_dlc/extractor/weibo.py
index 621df5b54..621df5b54 100644
--- a/youtube_dl/extractor/weibo.py
+++ b/youtube_dlc/extractor/weibo.py
diff --git a/youtube_dl/extractor/weiqitv.py b/youtube_dlc/extractor/weiqitv.py
index 7e0befd39..7e0befd39 100644
--- a/youtube_dl/extractor/weiqitv.py
+++ b/youtube_dlc/extractor/weiqitv.py
diff --git a/youtube_dlc/extractor/wistia.py b/youtube_dlc/extractor/wistia.py
new file mode 100644
index 000000000..77febd2eb
--- /dev/null
+++ b/youtube_dlc/extractor/wistia.py
@@ -0,0 +1,162 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ float_or_none,
+ unescapeHTML,
+)
+
+
+class WistiaIE(InfoExtractor):
+ _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P<id>[a-z0-9]{10})'
+ _EMBED_BASE_URL = 'http://fast.wistia.com/embed/'
+
+ _TESTS = [{
+ 'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
+ 'md5': 'cafeb56ec0c53c18c97405eecb3133df',
+ 'info_dict': {
+ 'id': 'sh7fpupwlt',
+ 'ext': 'mov',
+ 'title': 'Being Resourceful',
+ 'description': 'a Clients From Hell Video Series video from worldwidewebhosting',
+ 'upload_date': '20131204',
+ 'timestamp': 1386185018,
+ 'duration': 117,
+ },
+ }, {
+ 'url': 'wistia:sh7fpupwlt',
+ 'only_matching': True,
+ }, {
+ # with hls video
+ 'url': 'wistia:807fafadvk',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://fast.wistia.net/embed/medias/sh7fpupwlt.json',
+ 'only_matching': True,
+ }]
+
+ # https://wistia.com/support/embed-and-share/video-on-your-website
+ @staticmethod
+ def _extract_url(webpage):
+ urls = WistiaIE._extract_urls(webpage)
+ return urls[0] if urls else None
+
+ @staticmethod
+ def _extract_urls(webpage):
+ urls = []
+ for match in re.finditer(
+ r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage):
+ urls.append(unescapeHTML(match.group('url')))
+ for match in re.finditer(
+ r'''(?sx)
+ <div[^>]+class=(["'])(?:(?!\1).)*?\bwistia_async_(?P<id>[a-z0-9]{10})\b(?:(?!\1).)*?\1
+ ''', webpage):
+ urls.append('wistia:%s' % match.group('id'))
+ for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage):
+ urls.append('wistia:%s' % match.group('id'))
+ return urls
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ data_json = self._download_json(
+ self._EMBED_BASE_URL + 'medias/%s.json' % video_id, video_id,
+ # Some videos require this.
+ headers={
+ 'Referer': url if url.startswith('http') else self._EMBED_BASE_URL + 'iframe/' + video_id,
+ })
+
+ if data_json.get('error'):
+ raise ExtractorError(
+ 'Error while getting the playlist', expected=True)
+
+ data = data_json['media']
+ title = data['name']
+
+ formats = []
+ thumbnails = []
+ for a in data['assets']:
+ aurl = a.get('url')
+ if not aurl:
+ continue
+ astatus = a.get('status')
+ atype = a.get('type')
+ if (astatus is not None and astatus != 2) or atype in ('preview', 'storyboard'):
+ continue
+ elif atype in ('still', 'still_image'):
+ thumbnails.append({
+ 'url': aurl,
+ 'width': int_or_none(a.get('width')),
+ 'height': int_or_none(a.get('height')),
+ 'filesize': int_or_none(a.get('size')),
+ })
+ else:
+ aext = a.get('ext')
+ display_name = a.get('display_name')
+ format_id = atype
+ if atype and atype.endswith('_video') and display_name:
+ format_id = '%s-%s' % (atype[:-6], display_name)
+ f = {
+ 'format_id': format_id,
+ 'url': aurl,
+ 'tbr': int_or_none(a.get('bitrate')) or None,
+ 'preference': 1 if atype == 'original' else None,
+ }
+ if display_name == 'Audio':
+ f.update({
+ 'vcodec': 'none',
+ })
+ else:
+ f.update({
+ 'width': int_or_none(a.get('width')),
+ 'height': int_or_none(a.get('height')),
+ 'vcodec': a.get('codec'),
+ })
+ if a.get('container') == 'm3u8' or aext == 'm3u8':
+ ts_f = f.copy()
+ ts_f.update({
+ 'ext': 'ts',
+ 'format_id': f['format_id'].replace('hls-', 'ts-'),
+ 'url': f['url'].replace('.bin', '.ts'),
+ })
+ formats.append(ts_f)
+ f.update({
+ 'ext': 'mp4',
+ 'protocol': 'm3u8_native',
+ })
+ else:
+ f.update({
+ 'container': a.get('container'),
+ 'ext': aext,
+ 'filesize': int_or_none(a.get('size')),
+ })
+ formats.append(f)
+
+ self._sort_formats(formats)
+
+ subtitles = {}
+ for caption in data.get('captions', []):
+ language = caption.get('language')
+ if not language:
+ continue
+ subtitles[language] = [{
+ 'url': self._EMBED_BASE_URL + 'captions/' + video_id + '.vtt?language=' + language,
+ }]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': data.get('seoDescription'),
+ 'formats': formats,
+ 'thumbnails': thumbnails,
+ 'duration': float_or_none(data.get('duration')),
+ 'timestamp': int_or_none(data.get('createdAt')),
+ 'subtitles': subtitles,
+ }
diff --git a/youtube_dl/extractor/worldstarhiphop.py b/youtube_dlc/extractor/worldstarhiphop.py
index 82587b4ce..82587b4ce 100644
--- a/youtube_dl/extractor/worldstarhiphop.py
+++ b/youtube_dlc/extractor/worldstarhiphop.py
diff --git a/youtube_dl/extractor/wsj.py b/youtube_dlc/extractor/wsj.py
index 67236f377..67236f377 100644
--- a/youtube_dl/extractor/wsj.py
+++ b/youtube_dlc/extractor/wsj.py
diff --git a/youtube_dl/extractor/wwe.py b/youtube_dlc/extractor/wwe.py
index bebc77bb5..bebc77bb5 100644
--- a/youtube_dl/extractor/wwe.py
+++ b/youtube_dlc/extractor/wwe.py
diff --git a/youtube_dl/extractor/xbef.py b/youtube_dlc/extractor/xbef.py
index 4c41e98b2..4c41e98b2 100644
--- a/youtube_dl/extractor/xbef.py
+++ b/youtube_dlc/extractor/xbef.py
diff --git a/youtube_dl/extractor/xboxclips.py b/youtube_dlc/extractor/xboxclips.py
index d9c277bc3..d9c277bc3 100644
--- a/youtube_dl/extractor/xboxclips.py
+++ b/youtube_dlc/extractor/xboxclips.py
diff --git a/youtube_dlc/extractor/xfileshare.py b/youtube_dlc/extractor/xfileshare.py
new file mode 100644
index 000000000..48ef07ed1
--- /dev/null
+++ b/youtube_dlc/extractor/xfileshare.py
@@ -0,0 +1,193 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_chr
+from ..utils import (
+ decode_packed_codes,
+ determine_ext,
+ ExtractorError,
+ int_or_none,
+ js_to_json,
+ urlencode_postdata,
+)
+
+
+# based on openload_decode from 2bfeee69b976fe049761dd3012e30b637ee05a58
+def aa_decode(aa_code):
+ symbol_table = [
+ ('7', '((゚ー゚) + (o^_^o))'),
+ ('6', '((o^_^o) +(o^_^o))'),
+ ('5', '((゚ー゚) + (゚Θ゚))'),
+ ('2', '((o^_^o) - (゚Θ゚))'),
+ ('4', '(゚ー゚)'),
+ ('3', '(o^_^o)'),
+ ('1', '(゚Θ゚)'),
+ ('0', '(c^_^o)'),
+ ]
+ delim = '(゚Д゚)[゚ε゚]+'
+ ret = ''
+ for aa_char in aa_code.split(delim):
+ for val, pat in symbol_table:
+ aa_char = aa_char.replace(pat, val)
+ aa_char = aa_char.replace('+ ', '')
+ m = re.match(r'^\d+', aa_char)
+ if m:
+ ret += compat_chr(int(m.group(0), 8))
+ else:
+ m = re.match(r'^u([\da-f]+)', aa_char)
+ if m:
+ ret += compat_chr(int(m.group(1), 16))
+ return ret
+
+
+class XFileShareIE(InfoExtractor):
+ _SITES = (
+ (r'clipwatching\.com', 'ClipWatching'),
+ (r'gounlimited\.to', 'GoUnlimited'),
+ (r'govid\.me', 'GoVid'),
+ (r'holavid\.com', 'HolaVid'),
+ (r'streamty\.com', 'Streamty'),
+ (r'thevideobee\.to', 'TheVideoBee'),
+ (r'uqload\.com', 'Uqload'),
+ (r'vidbom\.com', 'VidBom'),
+ (r'vidlo\.us', 'vidlo'),
+ (r'vidlocker\.xyz', 'VidLocker'),
+ (r'vidshare\.tv', 'VidShare'),
+ (r'vup\.to', 'VUp'),
+ (r'xvideosharing\.com', 'XVideoSharing'),
+ )
+
+ IE_DESC = 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES))[1])
+ _VALID_URL = (r'https?://(?:www\.)?(?P<host>%s)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
+ % '|'.join(site for site in list(zip(*_SITES))[0]))
+
+ _FILE_NOT_FOUND_REGEXES = (
+ r'>(?:404 - )?File Not Found<',
+ r'>The file was removed by administrator<',
+ )
+
+ _TESTS = [{
+ 'url': 'http://xvideosharing.com/fq65f94nd2ve',
+ 'md5': '4181f63957e8fe90ac836fa58dc3c8a6',
+ 'info_dict': {
+ 'id': 'fq65f94nd2ve',
+ 'ext': 'mp4',
+ 'title': 'sample',
+ 'thumbnail': r're:http://.*\.jpg',
+ },
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [
+ mobj.group('url')
+ for mobj in re.finditer(
+ r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:%s)/embed-[0-9a-zA-Z]+.*?)\1'
+ % '|'.join(site for site in list(zip(*XFileShareIE._SITES))[0]),
+ webpage)]
+
+ def _real_extract(self, url):
+ host, video_id = re.match(self._VALID_URL, url).groups()
+
+ url = 'https://%s/' % host + ('embed-%s.html' % video_id if host in ('govid.me', 'vidlo.us') else video_id)
+ webpage = self._download_webpage(url, video_id)
+
+ if any(re.search(p, webpage) for p in self._FILE_NOT_FOUND_REGEXES):
+ raise ExtractorError('Video %s does not exist' % video_id, expected=True)
+
+ fields = self._hidden_inputs(webpage)
+
+ if fields.get('op') == 'download1':
+ countdown = int_or_none(self._search_regex(
+ r'<span id="countdown_str">(?:[Ww]ait)?\s*<span id="cxc">(\d+)</span>\s*(?:seconds?)?</span>',
+ webpage, 'countdown', default=None))
+ if countdown:
+ self._sleep(countdown, video_id)
+
+ webpage = self._download_webpage(
+ url, video_id, 'Downloading video page',
+ data=urlencode_postdata(fields), headers={
+ 'Referer': url,
+ 'Content-type': 'application/x-www-form-urlencoded',
+ })
+
+ title = (self._search_regex(
+ (r'style="z-index: [0-9]+;">([^<]+)</span>',
+ r'<td nowrap>([^<]+)</td>',
+ r'h4-fine[^>]*>([^<]+)<',
+ r'>Watch (.+)[ <]',
+ r'<h2 class="video-page-head">([^<]+)</h2>',
+ r'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<', # streamin.to
+ r'title\s*:\s*"([^"]+)"'), # govid.me
+ webpage, 'title', default=None) or self._og_search_title(
+ webpage, default=None) or video_id).strip()
+
+ for regex, func in (
+ (r'(eval\(function\(p,a,c,k,e,d\){.+)', decode_packed_codes),
+ (r'(゚.+)', aa_decode)):
+ obf_code = self._search_regex(regex, webpage, 'obfuscated code', default=None)
+ if obf_code:
+ webpage = webpage.replace(obf_code, func(obf_code))
+
+ formats = []
+
+ jwplayer_data = self._search_regex(
+ [
+ r'jwplayer\("[^"]+"\)\.load\(\[({.+?})\]\);',
+ r'jwplayer\("[^"]+"\)\.setup\(({.+?})\);',
+ ], webpage,
+ 'jwplayer data', default=None)
+ if jwplayer_data:
+ jwplayer_data = self._parse_json(
+ jwplayer_data.replace(r"\'", "'"), video_id, js_to_json)
+ if jwplayer_data:
+ formats = self._parse_jwplayer_data(
+ jwplayer_data, video_id, False,
+ m3u8_id='hls', mpd_id='dash')['formats']
+
+ if not formats:
+ urls = []
+ for regex in (
+ r'(?:file|src)\s*:\s*(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1',
+ r'file_link\s*=\s*(["\'])(?P<url>http(?:(?!\1).)+)\1',
+ r'addVariable\((\\?["\'])file\1\s*,\s*(\\?["\'])(?P<url>http(?:(?!\2).)+)\2\)',
+ r'<embed[^>]+src=(["\'])(?P<url>http(?:(?!\1).)+\.(?:m3u8|mp4|flv)(?:(?!\1).)*)\1'):
+ for mobj in re.finditer(regex, webpage):
+ video_url = mobj.group('url')
+ if video_url not in urls:
+ urls.append(video_url)
+
+ sources = self._search_regex(
+ r'sources\s*:\s*(\[(?!{)[^\]]+\])', webpage, 'sources', default=None)
+ if sources:
+ urls.extend(self._parse_json(sources, video_id))
+
+ formats = []
+ for video_url in urls:
+ if determine_ext(video_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ video_url, video_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls',
+ fatal=False))
+ else:
+ formats.append({
+ 'url': video_url,
+ 'format_id': 'sd',
+ })
+ self._sort_formats(formats)
+
+ thumbnail = self._search_regex(
+ [
+ r'<video[^>]+poster="([^"]+)"',
+ r'(?:image|poster)\s*:\s*["\'](http[^"\']+)["\'],',
+ ], webpage, 'thumbnail', default=None)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
diff --git a/youtube_dlc/extractor/xhamster.py b/youtube_dlc/extractor/xhamster.py
new file mode 100644
index 000000000..902a3ed33
--- /dev/null
+++ b/youtube_dlc/extractor/xhamster.py
@@ -0,0 +1,393 @@
+from __future__ import unicode_literals
+
+import itertools
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ clean_html,
+ determine_ext,
+ dict_get,
+ extract_attributes,
+ ExtractorError,
+ int_or_none,
+ parse_duration,
+ try_get,
+ unified_strdate,
+ url_or_none,
+)
+
+
+class XHamsterIE(InfoExtractor):
+ _DOMAINS = r'(?:xhamster\.(?:com|one|desi)|xhms\.pro|xhamster\d+\.com)'
+ _VALID_URL = r'''(?x)
+ https?://
+ (?:.+?\.)?%s/
+ (?:
+ movies/(?P<id>[\dA-Za-z]+)/(?P<display_id>[^/]*)\.html|
+ videos/(?P<display_id_2>[^/]*)-(?P<id_2>[\dA-Za-z]+)
+ )
+ ''' % _DOMAINS
+ _TESTS = [{
+ 'url': 'https://xhamster.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
+ 'md5': '98b4687efb1ffd331c4197854dc09e8f',
+ 'info_dict': {
+ 'id': '1509445',
+ 'display_id': 'femaleagent-shy-beauty-takes-the-bait',
+ 'ext': 'mp4',
+ 'title': 'FemaleAgent Shy beauty takes the bait',
+ 'timestamp': 1350194821,
+ 'upload_date': '20121014',
+ 'uploader': 'Ruseful2011',
+ 'duration': 893,
+ 'age_limit': 18,
+ },
+ }, {
+ 'url': 'https://xhamster.com/videos/britney-spears-sexy-booty-2221348?hd=',
+ 'info_dict': {
+ 'id': '2221348',
+ 'display_id': 'britney-spears-sexy-booty',
+ 'ext': 'mp4',
+ 'title': 'Britney Spears Sexy Booty',
+ 'timestamp': 1379123460,
+ 'upload_date': '20130914',
+ 'uploader': 'jojo747400',
+ 'duration': 200,
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # empty seo, unavailable via new URL schema
+ 'url': 'http://xhamster.com/movies/5667973/.html',
+ 'info_dict': {
+ 'id': '5667973',
+ 'ext': 'mp4',
+ 'title': '....',
+ 'timestamp': 1454948101,
+ 'upload_date': '20160208',
+ 'uploader': 'parejafree',
+ 'duration': 72,
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # mobile site
+ 'url': 'https://m.xhamster.com/videos/cute-teen-jacqueline-solo-masturbation-8559111',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://xhamster.com/movies/2272726/amber_slayed_by_the_knight.html',
+ 'only_matching': True,
+ }, {
+ # This video is visible for marcoalfa123456's friends only
+ 'url': 'https://it.xhamster.com/movies/7263980/la_mia_vicina.html',
+ 'only_matching': True,
+ }, {
+ # new URL schema
+ 'url': 'https://pt.xhamster.com/videos/euro-pedal-pumping-7937821',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://xhamster.one/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://xhamster.desi/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://xhamster2.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://xhamster11.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://xhamster26.com/videos/femaleagent-shy-beauty-takes-the-bait-1509445',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://xhamster.com/movies/1509445/femaleagent_shy_beauty_takes_the_bait.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://xhamster.com/movies/2221348/britney_spears_sexy_booty.html?hd',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://de.xhamster.com/videos/skinny-girl-fucks-herself-hard-in-the-forest-xhnBJZx',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id') or mobj.group('id_2')
+ display_id = mobj.group('display_id') or mobj.group('display_id_2')
+
+ desktop_url = re.sub(r'^(https?://(?:.+?\.)?)m\.', r'\1', url)
+ webpage, urlh = self._download_webpage_handle(desktop_url, video_id)
+
+ error = self._html_search_regex(
+ r'<div[^>]+id=["\']videoClosed["\'][^>]*>(.+?)</div>',
+ webpage, 'error', default=None)
+ if error:
+ raise ExtractorError(error, expected=True)
+
+ age_limit = self._rta_search(webpage)
+
+ def get_height(s):
+ return int_or_none(self._search_regex(
+ r'^(\d+)[pP]', s, 'height', default=None))
+
+ initials = self._parse_json(
+ self._search_regex(
+ r'window\.initials\s*=\s*({.+?})\s*;', webpage, 'initials',
+ default='{}'),
+ video_id, fatal=False)
+ if initials:
+ video = initials['videoModel']
+ title = video['title']
+ formats = []
+ for format_id, formats_dict in video['sources'].items():
+ if not isinstance(formats_dict, dict):
+ continue
+ for quality, format_item in formats_dict.items():
+ if format_id == 'download':
+ # Download link takes some time to be generated,
+ # skipping for now
+ continue
+ if not isinstance(format_item, dict):
+ continue
+ format_url = format_item.get('link')
+ filesize = int_or_none(
+ format_item.get('size'), invscale=1000000)
+ else:
+ format_url = format_item
+ filesize = None
+ format_url = url_or_none(format_url)
+ if not format_url:
+ continue
+ formats.append({
+ 'format_id': '%s-%s' % (format_id, quality),
+ 'url': format_url,
+ 'ext': determine_ext(format_url, 'mp4'),
+ 'height': get_height(quality),
+ 'filesize': filesize,
+ 'http_headers': {
+ 'Referer': urlh.geturl(),
+ },
+ })
+ self._sort_formats(formats)
+
+ categories_list = video.get('categories')
+ if isinstance(categories_list, list):
+ categories = []
+ for c in categories_list:
+ if not isinstance(c, dict):
+ continue
+ c_name = c.get('name')
+ if isinstance(c_name, compat_str):
+ categories.append(c_name)
+ else:
+ categories = None
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': video.get('description'),
+ 'timestamp': int_or_none(video.get('created')),
+ 'uploader': try_get(
+ video, lambda x: x['author']['name'], compat_str),
+ 'thumbnail': video.get('thumbURL'),
+ 'duration': int_or_none(video.get('duration')),
+ 'view_count': int_or_none(video.get('views')),
+ 'like_count': int_or_none(try_get(
+ video, lambda x: x['rating']['likes'], int)),
+ 'dislike_count': int_or_none(try_get(
+ video, lambda x: x['rating']['dislikes'], int)),
+ 'comment_count': int_or_none(video.get('views')),
+ 'age_limit': age_limit,
+ 'categories': categories,
+ 'formats': formats,
+ }
+
+ # Old layout fallback
+
+ title = self._html_search_regex(
+ [r'<h1[^>]*>([^<]+)</h1>',
+ r'<meta[^>]+itemprop=".*?caption.*?"[^>]+content="(.+?)"',
+ r'<title[^>]*>(.+?)(?:,\s*[^,]*?\s*Porn\s*[^,]*?:\s*xHamster[^<]*| - xHamster\.com)</title>'],
+ webpage, 'title')
+
+ formats = []
+ format_urls = set()
+
+ sources = self._parse_json(
+ self._search_regex(
+ r'sources\s*:\s*({.+?})\s*,?\s*\n', webpage, 'sources',
+ default='{}'),
+ video_id, fatal=False)
+ for format_id, format_url in sources.items():
+ format_url = url_or_none(format_url)
+ if not format_url:
+ continue
+ if format_url in format_urls:
+ continue
+ format_urls.add(format_url)
+ formats.append({
+ 'format_id': format_id,
+ 'url': format_url,
+ 'height': get_height(format_id),
+ })
+
+ video_url = self._search_regex(
+ [r'''file\s*:\s*(?P<q>["'])(?P<mp4>.+?)(?P=q)''',
+ r'''<a\s+href=(?P<q>["'])(?P<mp4>.+?)(?P=q)\s+class=["']mp4Thumb''',
+ r'''<video[^>]+file=(?P<q>["'])(?P<mp4>.+?)(?P=q)[^>]*>'''],
+ webpage, 'video url', group='mp4', default=None)
+ if video_url and video_url not in format_urls:
+ formats.append({
+ 'url': video_url,
+ })
+
+ self._sort_formats(formats)
+
+ # Only a few videos have an description
+ mobj = re.search(r'<span>Description: </span>([^<]+)', webpage)
+ description = mobj.group(1) if mobj else None
+
+ upload_date = unified_strdate(self._search_regex(
+ r'hint=["\'](\d{4}-\d{2}-\d{2}) \d{2}:\d{2}:\d{2} [A-Z]{3,4}',
+ webpage, 'upload date', fatal=False))
+
+ uploader = self._html_search_regex(
+ r'<span[^>]+itemprop=["\']author[^>]+><a[^>]+><span[^>]+>([^<]+)',
+ webpage, 'uploader', default='anonymous')
+
+ thumbnail = self._search_regex(
+ [r'''["']thumbUrl["']\s*:\s*(?P<q>["'])(?P<thumbnail>.+?)(?P=q)''',
+ r'''<video[^>]+"poster"=(?P<q>["'])(?P<thumbnail>.+?)(?P=q)[^>]*>'''],
+ webpage, 'thumbnail', fatal=False, group='thumbnail')
+
+ duration = parse_duration(self._search_regex(
+ [r'<[^<]+\bitemprop=["\']duration["\'][^<]+\bcontent=["\'](.+?)["\']',
+ r'Runtime:\s*</span>\s*([\d:]+)'], webpage,
+ 'duration', fatal=False))
+
+ view_count = int_or_none(self._search_regex(
+ r'content=["\']User(?:View|Play)s:(\d+)',
+ webpage, 'view count', fatal=False))
+
+ mobj = re.search(r'hint=[\'"](?P<likecount>\d+) Likes / (?P<dislikecount>\d+) Dislikes', webpage)
+ (like_count, dislike_count) = (mobj.group('likecount'), mobj.group('dislikecount')) if mobj else (None, None)
+
+ mobj = re.search(r'</label>Comments \((?P<commentcount>\d+)\)</div>', webpage)
+ comment_count = mobj.group('commentcount') if mobj else 0
+
+ categories_html = self._search_regex(
+ r'(?s)<table.+?(<span>Categories:.+?)</table>', webpage,
+ 'categories', default=None)
+ categories = [clean_html(category) for category in re.findall(
+ r'<a[^>]+>(.+?)</a>', categories_html)] if categories_html else None
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'upload_date': upload_date,
+ 'uploader': uploader,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'like_count': int_or_none(like_count),
+ 'dislike_count': int_or_none(dislike_count),
+ 'comment_count': int_or_none(comment_count),
+ 'age_limit': age_limit,
+ 'categories': categories,
+ 'formats': formats,
+ }
+
+
+class XHamsterEmbedIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:.+?\.)?%s/xembed\.php\?video=(?P<id>\d+)' % XHamsterIE._DOMAINS
+ _TEST = {
+ 'url': 'http://xhamster.com/xembed.php?video=3328539',
+ 'info_dict': {
+ 'id': '3328539',
+ 'ext': 'mp4',
+ 'title': 'Pen Masturbation',
+ 'timestamp': 1406581861,
+ 'upload_date': '20140728',
+ 'uploader': 'ManyakisArt',
+ 'duration': 5,
+ 'age_limit': 18,
+ }
+ }
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [url for _, url in re.findall(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?xhamster\.com/xembed\.php\?video=\d+)\1',
+ webpage)]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ video_url = self._search_regex(
+ r'href="(https?://xhamster\.com/(?:movies/{0}/[^"]*\.html|videos/[^/]*-{0})[^"]*)"'.format(video_id),
+ webpage, 'xhamster url', default=None)
+
+ if not video_url:
+ vars = self._parse_json(
+ self._search_regex(r'vars\s*:\s*({.+?})\s*,\s*\n', webpage, 'vars'),
+ video_id)
+ video_url = dict_get(vars, ('downloadLink', 'homepageLink', 'commentsLink', 'shareUrl'))
+
+ return self.url_result(video_url, 'XHamster')
+
+
+class XHamsterUserIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:.+?\.)?%s/users/(?P<id>[^/?#&]+)' % XHamsterIE._DOMAINS
+ _TESTS = [{
+ # Paginated user profile
+ 'url': 'https://xhamster.com/users/netvideogirls/videos',
+ 'info_dict': {
+ 'id': 'netvideogirls',
+ },
+ 'playlist_mincount': 267,
+ }, {
+ # Non-paginated user profile
+ 'url': 'https://xhamster.com/users/firatkaan/videos',
+ 'info_dict': {
+ 'id': 'firatkaan',
+ },
+ 'playlist_mincount': 1,
+ }]
+
+ def _entries(self, user_id):
+ next_page_url = 'https://xhamster.com/users/%s/videos/1' % user_id
+ for pagenum in itertools.count(1):
+ page = self._download_webpage(
+ next_page_url, user_id, 'Downloading page %s' % pagenum)
+ for video_tag in re.findall(
+ r'(<a[^>]+class=["\'].*?\bvideo-thumb__image-container[^>]+>)',
+ page):
+ video = extract_attributes(video_tag)
+ video_url = url_or_none(video.get('href'))
+ if not video_url or not XHamsterIE.suitable(video_url):
+ continue
+ video_id = XHamsterIE._match_id(video_url)
+ yield self.url_result(
+ video_url, ie=XHamsterIE.ie_key(), video_id=video_id)
+ mobj = re.search(r'<a[^>]+data-page=["\']next[^>]+>', page)
+ if not mobj:
+ break
+ next_page = extract_attributes(mobj.group(0))
+ next_page_url = url_or_none(next_page.get('href'))
+ if not next_page_url:
+ break
+
+ def _real_extract(self, url):
+ user_id = self._match_id(url)
+ return self.playlist_result(self._entries(user_id), user_id)
diff --git a/youtube_dl/extractor/xiami.py b/youtube_dlc/extractor/xiami.py
index 618da8382..618da8382 100644
--- a/youtube_dl/extractor/xiami.py
+++ b/youtube_dlc/extractor/xiami.py
diff --git a/youtube_dl/extractor/ximalaya.py b/youtube_dlc/extractor/ximalaya.py
index a912e54b8..a912e54b8 100644
--- a/youtube_dl/extractor/ximalaya.py
+++ b/youtube_dlc/extractor/ximalaya.py
diff --git a/youtube_dl/extractor/xminus.py b/youtube_dlc/extractor/xminus.py
index 36e5ead1e..36e5ead1e 100644
--- a/youtube_dl/extractor/xminus.py
+++ b/youtube_dlc/extractor/xminus.py
diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dlc/extractor/xnxx.py
index ac1ccc404..ac1ccc404 100644
--- a/youtube_dl/extractor/xnxx.py
+++ b/youtube_dlc/extractor/xnxx.py
diff --git a/youtube_dl/extractor/xstream.py b/youtube_dlc/extractor/xstream.py
index 76c91bd92..76c91bd92 100644
--- a/youtube_dl/extractor/xstream.py
+++ b/youtube_dlc/extractor/xstream.py
diff --git a/youtube_dlc/extractor/xtube.py b/youtube_dlc/extractor/xtube.py
new file mode 100644
index 000000000..01b253dcb
--- /dev/null
+++ b/youtube_dlc/extractor/xtube.py
@@ -0,0 +1,200 @@
+from __future__ import unicode_literals
+
+import itertools
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ js_to_json,
+ orderedSet,
+ parse_duration,
+ sanitized_Request,
+ str_to_int,
+)
+
+
+class XTubeIE(InfoExtractor):
+ _VALID_URL = r'''(?x)
+ (?:
+ xtube:|
+ https?://(?:www\.)?xtube\.com/(?:watch\.php\?.*\bv=|video-watch/(?:embedded/)?(?P<display_id>[^/]+)-)
+ )
+ (?P<id>[^/?&#]+)
+ '''
+
+ _TESTS = [{
+ # old URL schema
+ 'url': 'http://www.xtube.com/watch.php?v=kVTUy_G222_',
+ 'md5': '092fbdd3cbe292c920ef6fc6a8a9cdab',
+ 'info_dict': {
+ 'id': 'kVTUy_G222_',
+ 'ext': 'mp4',
+ 'title': 'strange erotica',
+ 'description': 'contains:an ET kind of thing',
+ 'uploader': 'greenshowers',
+ 'duration': 450,
+ 'view_count': int,
+ 'comment_count': int,
+ 'age_limit': 18,
+ }
+ }, {
+ # FLV videos with duplicated formats
+ 'url': 'http://www.xtube.com/video-watch/A-Super-Run-Part-1-YT-9299752',
+ 'md5': 'a406963eb349dd43692ec54631efd88b',
+ 'info_dict': {
+ 'id': '9299752',
+ 'display_id': 'A-Super-Run-Part-1-YT',
+ 'ext': 'flv',
+ 'title': 'A Super Run - Part 1 (YT)',
+ 'description': 'md5:4cc3af1aa1b0413289babc88f0d4f616',
+ 'uploader': 'tshirtguy59',
+ 'duration': 579,
+ 'view_count': int,
+ 'comment_count': int,
+ 'age_limit': 18,
+ },
+ }, {
+ # new URL schema
+ 'url': 'http://www.xtube.com/video-watch/strange-erotica-625837',
+ 'only_matching': True,
+ }, {
+ 'url': 'xtube:625837',
+ 'only_matching': True,
+ }, {
+ 'url': 'xtube:kVTUy_G222_',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.xtube.com/video-watch/embedded/milf-tara-and-teen-shared-and-cum-covered-extreme-bukkake-32203482?embedsize=big',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id')
+
+ if not display_id:
+ display_id = video_id
+
+ if video_id.isdigit() and len(video_id) < 11:
+ url_pattern = 'http://www.xtube.com/video-watch/-%s'
+ else:
+ url_pattern = 'http://www.xtube.com/watch.php?v=%s'
+
+ webpage = self._download_webpage(
+ url_pattern % video_id, display_id, headers={
+ 'Cookie': 'age_verified=1; cookiesAccepted=1',
+ })
+
+ title, thumbnail, duration = [None] * 3
+
+ config = self._parse_json(self._search_regex(
+ r'playerConf\s*=\s*({.+?})\s*,\s*\n', webpage, 'config',
+ default='{}'), video_id, transform_source=js_to_json, fatal=False)
+ if config:
+ config = config.get('mainRoll')
+ if isinstance(config, dict):
+ title = config.get('title')
+ thumbnail = config.get('poster')
+ duration = int_or_none(config.get('duration'))
+ sources = config.get('sources') or config.get('format')
+
+ if not isinstance(sources, dict):
+ sources = self._parse_json(self._search_regex(
+ r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),',
+ webpage, 'sources', group='sources'), video_id,
+ transform_source=js_to_json)
+
+ formats = []
+ for format_id, format_url in sources.items():
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'height': int_or_none(format_id),
+ })
+ self._remove_duplicate_formats(formats)
+ self._sort_formats(formats)
+
+ if not title:
+ title = self._search_regex(
+ (r'<h1>\s*(?P<title>[^<]+?)\s*</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'),
+ webpage, 'title', group='title')
+ description = self._og_search_description(
+ webpage, default=None) or self._html_search_meta(
+ 'twitter:description', webpage, default=None) or self._search_regex(
+ r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False)
+ uploader = self._search_regex(
+ (r'<input[^>]+name="contentOwnerId"[^>]+value="([^"]+)"',
+ r'<span[^>]+class="nickname"[^>]*>([^<]+)'),
+ webpage, 'uploader', fatal=False)
+ if not duration:
+ duration = parse_duration(self._search_regex(
+ r'<dt>Runtime:?</dt>\s*<dd>([^<]+)</dd>',
+ webpage, 'duration', fatal=False))
+ view_count = str_to_int(self._search_regex(
+ (r'["\']viewsCount["\'][^>]*>(\d+)\s+views',
+ r'<dt>Views:?</dt>\s*<dd>([\d,\.]+)</dd>'),
+ webpage, 'view count', fatal=False))
+ comment_count = str_to_int(self._html_search_regex(
+ r'>Comments? \(([\d,\.]+)\)<',
+ webpage, 'comment count', fatal=False))
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'duration': duration,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'age_limit': 18,
+ 'formats': formats,
+ }
+
+
+class XTubeUserIE(InfoExtractor):
+ IE_DESC = 'XTube user profile'
+ _VALID_URL = r'https?://(?:www\.)?xtube\.com/profile/(?P<id>[^/]+-\d+)'
+ _TEST = {
+ 'url': 'http://www.xtube.com/profile/greenshowers-4056496',
+ 'info_dict': {
+ 'id': 'greenshowers-4056496',
+ 'age_limit': 18,
+ },
+ 'playlist_mincount': 154,
+ }
+
+ def _real_extract(self, url):
+ user_id = self._match_id(url)
+
+ entries = []
+ for pagenum in itertools.count(1):
+ request = sanitized_Request(
+ 'http://www.xtube.com/profile/%s/videos/%d' % (user_id, pagenum),
+ headers={
+ 'Cookie': 'popunder=4',
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'Referer': url,
+ })
+
+ page = self._download_json(
+ request, user_id, 'Downloading videos JSON page %d' % pagenum)
+
+ html = page.get('html')
+ if not html:
+ break
+
+ for video_id in orderedSet([video_id for _, video_id in re.findall(
+ r'data-plid=(["\'])(.+?)\1', html)]):
+ entries.append(self.url_result('xtube:%s' % video_id, XTubeIE.ie_key()))
+
+ page_count = int_or_none(page.get('pageCount'))
+ if not page_count or pagenum == page_count:
+ break
+
+ playlist = self.playlist_result(entries, user_id)
+ playlist['age_limit'] = 18
+ return playlist
diff --git a/youtube_dl/extractor/xuite.py b/youtube_dlc/extractor/xuite.py
index 0276c0dbb..0276c0dbb 100644
--- a/youtube_dl/extractor/xuite.py
+++ b/youtube_dlc/extractor/xuite.py
diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dlc/extractor/xvideos.py
index 8fc64914c..8fc64914c 100644
--- a/youtube_dl/extractor/xvideos.py
+++ b/youtube_dlc/extractor/xvideos.py
diff --git a/youtube_dl/extractor/xxxymovies.py b/youtube_dlc/extractor/xxxymovies.py
index e34ebe3a6..e34ebe3a6 100644
--- a/youtube_dl/extractor/xxxymovies.py
+++ b/youtube_dlc/extractor/xxxymovies.py
diff --git a/youtube_dlc/extractor/yahoo.py b/youtube_dlc/extractor/yahoo.py
new file mode 100644
index 000000000..e4615376c
--- /dev/null
+++ b/youtube_dlc/extractor/yahoo.py
@@ -0,0 +1,569 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import hashlib
+import itertools
+import re
+
+from .common import InfoExtractor, SearchInfoExtractor
+from ..compat import (
+ compat_str,
+ compat_urllib_parse,
+)
+from ..utils import (
+ clean_html,
+ ExtractorError,
+ int_or_none,
+ mimetype2ext,
+ parse_iso8601,
+ smuggle_url,
+ try_get,
+ url_or_none,
+)
+
+from .brightcove import BrightcoveNewIE
+
+
+class YahooIE(InfoExtractor):
+ IE_DESC = 'Yahoo screen and movies'
+ _VALID_URL = r'(?P<url>https?://(?:(?P<country>[a-zA-Z]{2}(?:-[a-zA-Z]{2})?|malaysia)\.)?(?:[\da-zA-Z_-]+\.)?yahoo\.com/(?:[^/]+/)*(?P<id>[^?&#]*-[0-9]+(?:-[a-z]+)?)\.html)'
+ _TESTS = [{
+ 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
+ 'info_dict': {
+ 'id': '2d25e626-2378-391f-ada0-ddaf1417e588',
+ 'ext': 'mp4',
+ 'title': 'Julian Smith & Travis Legg Watch Julian Smith',
+ 'description': 'Julian and Travis watch Julian Smith',
+ 'duration': 6863,
+ 'timestamp': 1369812016,
+ 'upload_date': '20130529',
+ },
+ }, {
+ 'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed',
+ 'md5': '7993e572fac98e044588d0b5260f4352',
+ 'info_dict': {
+ 'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb',
+ 'ext': 'mp4',
+ 'title': "Yahoo Saves 'Community'",
+ 'description': 'md5:4d4145af2fd3de00cbb6c1d664105053',
+ 'duration': 170,
+ 'timestamp': 1406838636,
+ 'upload_date': '20140731',
+ },
+ }, {
+ 'url': 'https://uk.screen.yahoo.com/editor-picks/cute-raccoon-freed-drain-using-091756545.html',
+ 'md5': '71298482f7c64cbb7fa064e4553ff1c1',
+ 'info_dict': {
+ 'id': 'b3affa53-2e14-3590-852b-0e0db6cd1a58',
+ 'ext': 'webm',
+ 'title': 'Cute Raccoon Freed From Drain\u00a0Using Angle Grinder',
+ 'description': 'md5:f66c890e1490f4910a9953c941dee944',
+ 'duration': 97,
+ 'timestamp': 1414489862,
+ 'upload_date': '20141028',
+ }
+ }, {
+ 'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html',
+ 'md5': '88e209b417f173d86186bef6e4d1f160',
+ 'info_dict': {
+ 'id': 'f885cf7f-43d4-3450-9fac-46ac30ece521',
+ 'ext': 'mp4',
+ 'title': 'China Moses Is Crazy About the Blues',
+ 'description': 'md5:9900ab8cd5808175c7b3fe55b979bed0',
+ 'duration': 128,
+ 'timestamp': 1385722202,
+ 'upload_date': '20131129',
+ }
+ }, {
+ 'url': 'https://www.yahoo.com/movies/v/true-story-trailer-173000497.html',
+ 'md5': '2a9752f74cb898af5d1083ea9f661b58',
+ 'info_dict': {
+ 'id': '071c4013-ce30-3a93-a5b2-e0413cd4a9d1',
+ 'ext': 'mp4',
+ 'title': '\'True Story\' Trailer',
+ 'description': 'True Story',
+ 'duration': 150,
+ 'timestamp': 1418919206,
+ 'upload_date': '20141218',
+ },
+ }, {
+ 'url': 'https://gma.yahoo.com/pizza-delivery-man-surprised-huge-tip-college-kids-195200785.html',
+ 'only_matching': True,
+ }, {
+ 'note': 'NBC Sports embeds',
+ 'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313',
+ 'info_dict': {
+ 'id': '9CsDKds0kvHI',
+ 'ext': 'flv',
+ 'description': 'md5:df390f70a9ba7c95ff1daace988f0d8d',
+ 'title': 'Tyler Kalinoski hits buzzer-beater to lift Davidson',
+ 'upload_date': '20150313',
+ 'uploader': 'NBCU-SPORTS',
+ 'timestamp': 1426270238,
+ },
+ }, {
+ 'url': 'https://tw.news.yahoo.com/-100120367.html',
+ 'only_matching': True,
+ }, {
+ # Query result is embedded in webpage, but explicit request to video API fails with geo restriction
+ 'url': 'https://screen.yahoo.com/community/communitary-community-episode-1-ladders-154501237.html',
+ 'md5': '4fbafb9c9b6f07aa8f870629f6671b35',
+ 'info_dict': {
+ 'id': '1f32853c-a271-3eef-8cb6-f6d6872cb504',
+ 'ext': 'mp4',
+ 'title': 'Communitary - Community Episode 1: Ladders',
+ 'description': 'md5:8fc39608213295748e1e289807838c97',
+ 'duration': 1646,
+ 'timestamp': 1440436550,
+ 'upload_date': '20150824',
+ 'series': 'Communitary',
+ 'season_number': 6,
+ 'episode_number': 1,
+ },
+ }, {
+ # ytwnews://cavideo/
+ 'url': 'https://tw.video.yahoo.com/movie-tw/單車天使-中文版預-092316541.html',
+ 'info_dict': {
+ 'id': 'ba133ff2-0793-3510-b636-59dfe9ff6cff',
+ 'ext': 'mp4',
+ 'title': '單車天使 - 中文版預',
+ 'description': '中文版預',
+ 'timestamp': 1476696196,
+ 'upload_date': '20161017',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # Contains both a Yahoo hosted video and multiple Youtube embeds
+ 'url': 'https://www.yahoo.com/entertainment/gwen-stefani-reveals-the-pop-hit-she-passed-on-assigns-it-to-her-voice-contestant-instead-033045672.html',
+ 'info_dict': {
+ 'id': '46c5d95a-528f-3d03-b732-732fcadd51de',
+ 'title': 'Gwen Stefani reveals the pop hit she passed on, assigns it to her \'Voice\' contestant instead',
+ 'description': 'Gwen decided not to record this hit herself, but she decided it was the perfect fit for Kyndall Inskeep.',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': '966d4262-4fd1-3aaa-b45b-049ca6e38ba6',
+ 'ext': 'mp4',
+ 'title': 'Gwen Stefani reveals she turned down one of Sia\'s best songs',
+ 'description': 'On "The Voice" Tuesday, Gwen Stefani told Taylor Swift which Sia hit was almost hers.',
+ 'timestamp': 1572406500,
+ 'upload_date': '20191030',
+ },
+ }, {
+ 'info_dict': {
+ 'id': '352CFDOQrKg',
+ 'ext': 'mp4',
+ 'title': 'Kyndal Inskeep "Performs the Hell Out of" Sia\'s "Elastic Heart" - The Voice Knockouts 2019',
+ 'description': 'md5:35b61e94c2ae214bc965ff4245f80d11',
+ 'uploader': 'The Voice',
+ 'uploader_id': 'NBCTheVoice',
+ 'upload_date': '20191029',
+ },
+ }],
+ 'params': {
+ 'playlistend': 2,
+ },
+ 'expected_warnings': ['HTTP Error 404'],
+ }, {
+ 'url': 'https://malaysia.news.yahoo.com/video/bystanders-help-ontario-policeman-bust-190932818.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://es-us.noticias.yahoo.com/es-la-puerta-irrompible-que-110539379.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.yahoo.com/entertainment/v/longtime-cbs-news-60-minutes-032036500-cbs.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ url, country, display_id = re.match(self._VALID_URL, url).groups()
+ if not country:
+ country = 'us'
+ else:
+ country = country.split('-')[0]
+ api_base = 'https://%s.yahoo.com/_td/api/resource/' % country
+
+ for i, uuid in enumerate(['url=' + url, 'ymedia-alias=' + display_id]):
+ content = self._download_json(
+ api_base + 'content;getDetailView=true;uuids=["%s"]' % uuid,
+ display_id, 'Downloading content JSON metadata', fatal=i == 1)
+ if content:
+ item = content['items'][0]
+ break
+
+ if item.get('type') != 'video':
+ entries = []
+
+ cover = item.get('cover') or {}
+ if cover.get('type') == 'yvideo':
+ cover_url = cover.get('url')
+ if cover_url:
+ entries.append(self.url_result(
+ cover_url, 'Yahoo', cover.get('uuid')))
+
+ for e in item.get('body', []):
+ if e.get('type') == 'videoIframe':
+ iframe_url = e.get('url')
+ if not iframe_url:
+ continue
+ entries.append(self.url_result(iframe_url))
+
+ return self.playlist_result(
+ entries, item.get('uuid'),
+ item.get('title'), item.get('summary'))
+
+ video_id = item['uuid']
+ video = self._download_json(
+ api_base + 'VideoService.videos;view=full;video_ids=["%s"]' % video_id,
+ video_id, 'Downloading video JSON metadata')[0]
+ title = video['title']
+
+ if country == 'malaysia':
+ country = 'my'
+
+ is_live = video.get('live_state') == 'live'
+ fmts = ('m3u8',) if is_live else ('webm', 'mp4')
+
+ urls = []
+ formats = []
+ subtitles = {}
+ for fmt in fmts:
+ media_obj = self._download_json(
+ 'https://video-api.yql.yahoo.com/v1/video/sapi/streams/' + video_id,
+ video_id, 'Downloading %s JSON metadata' % fmt,
+ headers=self.geo_verification_headers(), query={
+ 'format': fmt,
+ 'region': country.upper(),
+ })['query']['results']['mediaObj'][0]
+ msg = media_obj.get('status', {}).get('msg')
+
+ for s in media_obj.get('streams', []):
+ host = s.get('host')
+ path = s.get('path')
+ if not host or not path:
+ continue
+ s_url = host + path
+ if s.get('format') == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ s_url, video_id, 'mp4', m3u8_id='hls', fatal=False))
+ continue
+ tbr = int_or_none(s.get('bitrate'))
+ formats.append({
+ 'url': s_url,
+ 'format_id': fmt + ('-%d' % tbr if tbr else ''),
+ 'width': int_or_none(s.get('width')),
+ 'height': int_or_none(s.get('height')),
+ 'tbr': tbr,
+ 'fps': int_or_none(s.get('framerate')),
+ })
+
+ for cc in media_obj.get('closedcaptions', []):
+ cc_url = cc.get('url')
+ if not cc_url or cc_url in urls:
+ continue
+ urls.append(cc_url)
+ subtitles.setdefault(cc.get('lang') or 'en-US', []).append({
+ 'url': cc_url,
+ 'ext': mimetype2ext(cc.get('content_type')),
+ })
+
+ streaming_url = video.get('streaming_url')
+ if streaming_url and not is_live:
+ formats.extend(self._extract_m3u8_formats(
+ streaming_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False))
+
+ if not formats and msg == 'geo restricted':
+ self.raise_geo_restricted()
+
+ self._sort_formats(formats)
+
+ thumbnails = []
+ for thumb in video.get('thumbnails', []):
+ thumb_url = thumb.get('url')
+ if not thumb_url:
+ continue
+ thumbnails.append({
+ 'id': thumb.get('tag'),
+ 'url': thumb.get('url'),
+ 'width': int_or_none(thumb.get('width')),
+ 'height': int_or_none(thumb.get('height')),
+ })
+
+ series_info = video.get('series_info') or {}
+
+ return {
+ 'id': video_id,
+ 'title': self._live_title(title) if is_live else title,
+ 'formats': formats,
+ 'display_id': display_id,
+ 'thumbnails': thumbnails,
+ 'description': clean_html(video.get('description')),
+ 'timestamp': parse_iso8601(video.get('publish_time')),
+ 'subtitles': subtitles,
+ 'duration': int_or_none(video.get('duration')),
+ 'view_count': int_or_none(video.get('view_count')),
+ 'is_live': is_live,
+ 'series': video.get('show_name'),
+ 'season_number': int_or_none(series_info.get('season_number')),
+ 'episode_number': int_or_none(series_info.get('episode_number')),
+ }
+
+
+class YahooSearchIE(SearchInfoExtractor):
+ IE_DESC = 'Yahoo screen search'
+ _MAX_RESULTS = 1000
+ IE_NAME = 'screen.yahoo:search'
+ _SEARCH_KEY = 'yvsearch'
+
+ def _get_n_results(self, query, n):
+ """Get a specified number of results for a query"""
+ entries = []
+ for pagenum in itertools.count(0):
+ result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
+ info = self._download_json(result_url, query,
+ note='Downloading results page ' + str(pagenum + 1))
+ m = info['m']
+ results = info['results']
+
+ for (i, r) in enumerate(results):
+ if (pagenum * 30) + i >= n:
+ break
+ mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r)
+ e = self.url_result('http://' + mobj.group('url'), 'Yahoo')
+ entries.append(e)
+ if (pagenum * 30 + i >= n) or (m['last'] >= (m['total'] - 1)):
+ break
+
+ return {
+ '_type': 'playlist',
+ 'id': query,
+ 'entries': entries,
+ }
+
+
+class YahooGyaOPlayerIE(InfoExtractor):
+ IE_NAME = 'yahoo:gyao:player'
+ _VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/(?:player|episode/[^/]+)|streaming\.yahoo\.co\.jp/c/y)/(?P<id>\d+/v\d+/v\d+|[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _TESTS = [{
+ 'url': 'https://gyao.yahoo.co.jp/player/00998/v00818/v0000000000000008564/',
+ 'info_dict': {
+ 'id': '5993125228001',
+ 'ext': 'mp4',
+ 'title': 'フューリー 【字幕版】',
+ 'description': 'md5:21e691c798a15330eda4db17a8fe45a5',
+ 'uploader_id': '4235717419001',
+ 'upload_date': '20190124',
+ 'timestamp': 1548294365,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://streaming.yahoo.co.jp/c/y/01034/v00133/v0000000000000000706/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://gyao.yahoo.co.jp/episode/%E3%81%8D%E3%81%AE%E3%81%86%E4%BD%95%E9%A3%9F%E3%81%B9%E3%81%9F%EF%BC%9F%20%E7%AC%AC2%E8%A9%B1%202019%2F4%2F12%E6%94%BE%E9%80%81%E5%88%86/5cb02352-b725-409e-9f8d-88f947a9f682',
+ 'only_matching': True,
+ }]
+ _GEO_BYPASS = False
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url).replace('/', ':')
+ headers = self.geo_verification_headers()
+ headers['Accept'] = 'application/json'
+ resp = self._download_json(
+ 'https://gyao.yahoo.co.jp/apis/playback/graphql', video_id, query={
+ 'appId': 'dj00aiZpPUNJeDh2cU1RazU3UCZzPWNvbnN1bWVyc2VjcmV0Jng9NTk-',
+ 'query': '''{
+ content(parameter: {contentId: "%s", logicaAgent: PC_WEB}) {
+ video {
+ delivery {
+ id
+ }
+ title
+ }
+ }
+}''' % video_id,
+ }, headers=headers)
+ content = resp['data']['content']
+ if not content:
+ msg = resp['errors'][0]['message']
+ if msg == 'not in japan':
+ self.raise_geo_restricted(countries=['JP'])
+ raise ExtractorError(msg)
+ video = content['video']
+ return {
+ '_type': 'url_transparent',
+ 'id': video_id,
+ 'title': video['title'],
+ 'url': smuggle_url(
+ 'http://players.brightcove.net/4235717419001/SyG5P0gjb_default/index.html?videoId=' + video['delivery']['id'],
+ {'geo_countries': ['JP']}),
+ 'ie_key': BrightcoveNewIE.ie_key(),
+ }
+
+
+class YahooGyaOIE(InfoExtractor):
+ IE_NAME = 'yahoo:gyao'
+ _VALID_URL = r'https?://(?:gyao\.yahoo\.co\.jp/(?:p|title(?:/[^/]+)?)|streaming\.yahoo\.co\.jp/p/y)/(?P<id>\d+/v\d+|[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+ _TESTS = [{
+ 'url': 'https://gyao.yahoo.co.jp/p/00449/v03102/',
+ 'info_dict': {
+ 'id': '00449:v03102',
+ },
+ 'playlist_count': 2,
+ }, {
+ 'url': 'https://streaming.yahoo.co.jp/p/y/01034/v00133/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://gyao.yahoo.co.jp/title/%E3%81%97%E3%82%83%E3%81%B9%E3%81%8F%E3%82%8A007/5b025a49-b2e5-4dc7-945c-09c6634afacf',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://gyao.yahoo.co.jp/title/5b025a49-b2e5-4dc7-945c-09c6634afacf',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ program_id = self._match_id(url).replace('/', ':')
+ videos = self._download_json(
+ 'https://gyao.yahoo.co.jp/api/programs/%s/videos' % program_id, program_id)['videos']
+ entries = []
+ for video in videos:
+ video_id = video.get('id')
+ if not video_id:
+ continue
+ entries.append(self.url_result(
+ 'https://gyao.yahoo.co.jp/player/%s/' % video_id.replace(':', '/'),
+ YahooGyaOPlayerIE.ie_key(), video_id))
+ return self.playlist_result(entries, program_id)
+
+
+class YahooJapanNewsIE(InfoExtractor):
+ IE_NAME = 'yahoo:japannews'
+ IE_DESC = 'Yahoo! Japan News'
+ _VALID_URL = r'https?://(?P<host>(?:news|headlines)\.yahoo\.co\.jp)[^\d]*(?P<id>\d[\d-]*\d)?'
+ _GEO_COUNTRIES = ['JP']
+ _TESTS = [{
+ 'url': 'https://headlines.yahoo.co.jp/videonews/ann?a=20190716-00000071-ann-int',
+ 'info_dict': {
+ 'id': '1736242',
+ 'ext': 'mp4',
+ 'title': 'ムン大統領が対日批判を強化“現金化”効果は?(テレビ朝日系(ANN)) - Yahoo!ニュース',
+ 'description': '韓国の元徴用工らを巡る裁判の原告が弁護士が差し押さえた三菱重工業の資産を売却して - Yahoo!ニュース(テレビ朝日系(ANN))',
+ 'thumbnail': r're:^https?://.*\.[a-zA-Z\d]{3,4}$',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ # geo restricted
+ 'url': 'https://headlines.yahoo.co.jp/hl?a=20190721-00000001-oxv-l04',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://headlines.yahoo.co.jp/videonews/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://news.yahoo.co.jp',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://news.yahoo.co.jp/byline/hashimotojunji/20190628-00131977/',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://news.yahoo.co.jp/feature/1356',
+ 'only_matching': True
+ }]
+
+ def _extract_formats(self, json_data, content_id):
+ formats = []
+
+ video_data = try_get(
+ json_data,
+ lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'],
+ list)
+ for vid in video_data or []:
+ delivery = vid.get('delivery')
+ url = url_or_none(vid.get('Url'))
+ if not delivery or not url:
+ continue
+ elif delivery == 'hls':
+ formats.extend(
+ self._extract_m3u8_formats(
+ url, content_id, 'mp4', 'm3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ formats.append({
+ 'url': url,
+ 'format_id': 'http-%s' % compat_str(vid.get('bitrate', '')),
+ 'height': int_or_none(vid.get('height')),
+ 'width': int_or_none(vid.get('width')),
+ 'tbr': int_or_none(vid.get('bitrate')),
+ })
+ self._remove_duplicate_formats(formats)
+ self._sort_formats(formats)
+
+ return formats
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ host = mobj.group('host')
+ display_id = mobj.group('id') or host
+
+ webpage = self._download_webpage(url, display_id)
+
+ title = self._html_search_meta(
+ ['og:title', 'twitter:title'], webpage, 'title', default=None
+ ) or self._html_search_regex('<title>([^<]+)</title>', webpage, 'title')
+
+ if display_id == host:
+ # Headline page (w/ multiple BC playlists) ('news.yahoo.co.jp', 'headlines.yahoo.co.jp/videonews/', ...)
+ stream_plists = re.findall(r'plist=(\d+)', webpage) or re.findall(r'plist["\']:\s*["\']([^"\']+)', webpage)
+ entries = [
+ self.url_result(
+ smuggle_url(
+ 'http://players.brightcove.net/5690807595001/HyZNerRl7_default/index.html?playlistId=%s' % plist_id,
+ {'geo_countries': ['JP']}),
+ ie='BrightcoveNew', video_id=plist_id)
+ for plist_id in stream_plists]
+ return self.playlist_result(entries, playlist_title=title)
+
+ # Article page
+ description = self._html_search_meta(
+ ['og:description', 'description', 'twitter:description'],
+ webpage, 'description', default=None)
+ thumbnail = self._og_search_thumbnail(
+ webpage, default=None) or self._html_search_meta(
+ 'twitter:image', webpage, 'thumbnail', default=None)
+ space_id = self._search_regex([
+ r'<script[^>]+class=["\']yvpub-player["\'][^>]+spaceid=([^&"\']+)',
+ r'YAHOO\.JP\.srch\.\w+link\.onLoad[^;]+spaceID["\' ]*:["\' ]+([^"\']+)',
+ r'<!--\s+SpaceID=(\d+)'
+ ], webpage, 'spaceid')
+
+ content_id = self._search_regex(
+ r'<script[^>]+class=["\']yvpub-player["\'][^>]+contentid=(?P<contentid>[^&"\']+)',
+ webpage, 'contentid', group='contentid')
+
+ json_data = self._download_json(
+ 'https://feapi-yvpub.yahooapis.jp/v1/content/%s' % content_id,
+ content_id,
+ query={
+ 'appid': 'dj0zaiZpPVZMTVFJR0FwZWpiMyZzPWNvbnN1bWVyc2VjcmV0Jng9YjU-',
+ 'output': 'json',
+ 'space_id': space_id,
+ 'domain': host,
+ 'ak': hashlib.md5('_'.join((space_id, host)).encode()).hexdigest(),
+ 'device_type': '1100',
+ })
+ formats = self._extract_formats(json_data, content_id)
+
+ return {
+ 'id': content_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/yandexdisk.py b/youtube_dlc/extractor/yandexdisk.py
index e8f6ae10f..e8f6ae10f 100644
--- a/youtube_dl/extractor/yandexdisk.py
+++ b/youtube_dlc/extractor/yandexdisk.py
diff --git a/youtube_dlc/extractor/yandexmusic.py b/youtube_dlc/extractor/yandexmusic.py
new file mode 100644
index 000000000..4358bc836
--- /dev/null
+++ b/youtube_dlc/extractor/yandexmusic.py
@@ -0,0 +1,313 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+import hashlib
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ ExtractorError,
+ int_or_none,
+ float_or_none,
+ try_get,
+)
+
+
+class YandexMusicBaseIE(InfoExtractor):
+ @staticmethod
+ def _handle_error(response):
+ if isinstance(response, dict):
+ error = response.get('error')
+ if error:
+ raise ExtractorError(error, expected=True)
+ if response.get('type') == 'captcha' or 'captcha' in response:
+ YandexMusicBaseIE._raise_captcha()
+
+ @staticmethod
+ def _raise_captcha():
+ raise ExtractorError(
+ 'YandexMusic has considered youtube-dlc requests automated and '
+ 'asks you to solve a CAPTCHA. You can either wait for some '
+ 'time until unblocked and optionally use --sleep-interval '
+ 'in future or alternatively you can go to https://music.yandex.ru/ '
+ 'solve CAPTCHA, then export cookies and pass cookie file to '
+ 'youtube-dlc with --cookies',
+ expected=True)
+
+ def _download_webpage_handle(self, *args, **kwargs):
+ webpage = super(YandexMusicBaseIE, self)._download_webpage_handle(*args, **kwargs)
+ if 'Нам очень жаль, но&nbsp;запросы, поступившие с&nbsp;вашего IP-адреса, похожи на&nbsp;автоматические.' in webpage:
+ self._raise_captcha()
+ return webpage
+
+ def _download_json(self, *args, **kwargs):
+ response = super(YandexMusicBaseIE, self)._download_json(*args, **kwargs)
+ self._handle_error(response)
+ return response
+
+
+class YandexMusicTrackIE(YandexMusicBaseIE):
+ IE_NAME = 'yandexmusic:track'
+ IE_DESC = 'Яндекс.Музыка - Трек'
+ _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'http://music.yandex.ru/album/540508/track/4878838',
+ 'md5': 'f496818aa2f60b6c0062980d2e00dc20',
+ 'info_dict': {
+ 'id': '4878838',
+ 'ext': 'mp3',
+ 'title': 'Carlo Ambrosio & Fabio Di Bari - Gypsy Eyes 1',
+ 'filesize': 4628061,
+ 'duration': 193.04,
+ 'track': 'Gypsy Eyes 1',
+ 'album': 'Gypsy Soul',
+ 'album_artist': 'Carlo Ambrosio',
+ 'artist': 'Carlo Ambrosio & Fabio Di Bari',
+ 'release_year': 2009,
+ },
+ 'skip': 'Travis CI servers blocked by YandexMusic',
+ }, {
+ # multiple disks
+ 'url': 'http://music.yandex.ru/album/3840501/track/705105',
+ 'md5': 'ebe7b4e2ac7ac03fe11c19727ca6153e',
+ 'info_dict': {
+ 'id': '705105',
+ 'ext': 'mp3',
+ 'title': 'Hooverphonic - Sometimes',
+ 'filesize': 5743386,
+ 'duration': 239.27,
+ 'track': 'Sometimes',
+ 'album': 'The Best of Hooverphonic',
+ 'album_artist': 'Hooverphonic',
+ 'artist': 'Hooverphonic',
+ 'release_year': 2016,
+ 'genre': 'pop',
+ 'disc_number': 2,
+ 'track_number': 9,
+ },
+ 'skip': 'Travis CI servers blocked by YandexMusic',
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ album_id, track_id = mobj.group('album_id'), mobj.group('id')
+
+ track = self._download_json(
+ 'http://music.yandex.ru/handlers/track.jsx?track=%s:%s' % (track_id, album_id),
+ track_id, 'Downloading track JSON')['track']
+ track_title = track['title']
+
+ download_data = self._download_json(
+ 'https://music.yandex.ru/api/v2.1/handlers/track/%s:%s/web-album_track-track-track-main/download/m' % (track_id, album_id),
+ track_id, 'Downloading track location url JSON',
+ headers={'X-Retpath-Y': url})
+
+ fd_data = self._download_json(
+ download_data['src'], track_id,
+ 'Downloading track location JSON',
+ query={'format': 'json'})
+ key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + fd_data['path'][1:] + fd_data['s']).encode('utf-8')).hexdigest()
+ storage = track['storageDir'].split('.')
+ f_url = 'http://%s/get-mp3/%s/%s?track-id=%s ' % (fd_data['host'], key, fd_data['ts'] + fd_data['path'], storage[1])
+
+ thumbnail = None
+ cover_uri = track.get('albums', [{}])[0].get('coverUri')
+ if cover_uri:
+ thumbnail = cover_uri.replace('%%', 'orig')
+ if not thumbnail.startswith('http'):
+ thumbnail = 'http://' + thumbnail
+
+ track_info = {
+ 'id': track_id,
+ 'ext': 'mp3',
+ 'url': f_url,
+ 'filesize': int_or_none(track.get('fileSize')),
+ 'duration': float_or_none(track.get('durationMs'), 1000),
+ 'thumbnail': thumbnail,
+ 'track': track_title,
+ 'acodec': download_data.get('codec'),
+ 'abr': int_or_none(download_data.get('bitrate')),
+ }
+
+ def extract_artist_name(artist):
+ decomposed = artist.get('decomposed')
+ if not isinstance(decomposed, list):
+ return artist['name']
+ parts = [artist['name']]
+ for element in decomposed:
+ if isinstance(element, dict) and element.get('name'):
+ parts.append(element['name'])
+ elif isinstance(element, compat_str):
+ parts.append(element)
+ return ''.join(parts)
+
+ def extract_artist(artist_list):
+ if artist_list and isinstance(artist_list, list):
+ artists_names = [extract_artist_name(a) for a in artist_list if a.get('name')]
+ if artists_names:
+ return ', '.join(artists_names)
+
+ albums = track.get('albums')
+ if albums and isinstance(albums, list):
+ album = albums[0]
+ if isinstance(album, dict):
+ year = album.get('year')
+ disc_number = int_or_none(try_get(
+ album, lambda x: x['trackPosition']['volume']))
+ track_number = int_or_none(try_get(
+ album, lambda x: x['trackPosition']['index']))
+ track_info.update({
+ 'album': album.get('title'),
+ 'album_artist': extract_artist(album.get('artists')),
+ 'release_year': int_or_none(year),
+ 'genre': album.get('genre'),
+ 'disc_number': disc_number,
+ 'track_number': track_number,
+ })
+
+ track_artist = extract_artist(track.get('artists'))
+ if track_artist:
+ track_info.update({
+ 'artist': track_artist,
+ 'title': '%s - %s' % (track_artist, track_title),
+ })
+ else:
+ track_info['title'] = track_title
+
+ return track_info
+
+
+class YandexMusicPlaylistBaseIE(YandexMusicBaseIE):
+ def _build_playlist(self, tracks):
+ return [
+ self.url_result(
+ 'http://music.yandex.ru/album/%s/track/%s' % (track['albums'][0]['id'], track['id']))
+ for track in tracks if track.get('albums') and isinstance(track.get('albums'), list)]
+
+
+class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
+ IE_NAME = 'yandexmusic:album'
+ IE_DESC = 'Яндекс.Музыка - Альбом'
+ _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)'
+
+ _TESTS = [{
+ 'url': 'http://music.yandex.ru/album/540508',
+ 'info_dict': {
+ 'id': '540508',
+ 'title': 'Carlo Ambrosio - Gypsy Soul (2009)',
+ },
+ 'playlist_count': 50,
+ 'skip': 'Travis CI servers blocked by YandexMusic',
+ }, {
+ 'url': 'https://music.yandex.ru/album/3840501',
+ 'info_dict': {
+ 'id': '3840501',
+ 'title': 'Hooverphonic - The Best of Hooverphonic (2016)',
+ },
+ 'playlist_count': 33,
+ 'skip': 'Travis CI servers blocked by YandexMusic',
+ }]
+
+ def _real_extract(self, url):
+ album_id = self._match_id(url)
+
+ album = self._download_json(
+ 'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id,
+ album_id, 'Downloading album JSON')
+
+ entries = self._build_playlist([track for volume in album['volumes'] for track in volume])
+
+ title = '%s - %s' % (album['artists'][0]['name'], album['title'])
+ year = album.get('year')
+ if year:
+ title += ' (%s)' % year
+
+ return self.playlist_result(entries, compat_str(album['id']), title)
+
+
+class YandexMusicPlaylistIE(YandexMusicPlaylistBaseIE):
+ IE_NAME = 'yandexmusic:playlist'
+ IE_DESC = 'Яндекс.Музыка - Плейлист'
+ _VALID_URL = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by)/users/(?P<user>[^/]+)/playlists/(?P<id>\d+)'
+
+ _TESTS = [{
+ 'url': 'http://music.yandex.ru/users/music.partners/playlists/1245',
+ 'info_dict': {
+ 'id': '1245',
+ 'title': 'Что слушают Enter Shikari',
+ 'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9',
+ },
+ 'playlist_count': 6,
+ 'skip': 'Travis CI servers blocked by YandexMusic',
+ }, {
+ # playlist exceeding the limit of 150 tracks shipped with webpage (see
+ # https://github.com/ytdl-org/youtube-dl/issues/6666)
+ 'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036',
+ 'info_dict': {
+ 'id': '1036',
+ 'title': 'Музыка 90-х',
+ },
+ 'playlist_mincount': 300,
+ 'skip': 'Travis CI servers blocked by YandexMusic',
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ tld = mobj.group('tld')
+ user = mobj.group('user')
+ playlist_id = mobj.group('id')
+
+ playlist = self._download_json(
+ 'https://music.yandex.%s/handlers/playlist.jsx' % tld,
+ playlist_id, 'Downloading missing tracks JSON',
+ fatal=False,
+ headers={
+ 'Referer': url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ 'X-Retpath-Y': url,
+ },
+ query={
+ 'owner': user,
+ 'kinds': playlist_id,
+ 'light': 'true',
+ 'lang': tld,
+ 'external-domain': 'music.yandex.%s' % tld,
+ 'overembed': 'false',
+ })['playlist']
+
+ tracks = playlist['tracks']
+ track_ids = [compat_str(track_id) for track_id in playlist['trackIds']]
+
+ # tracks dictionary shipped with playlist.jsx API is limited to 150 tracks,
+ # missing tracks should be retrieved manually.
+ if len(tracks) < len(track_ids):
+ present_track_ids = set([
+ compat_str(track['id'])
+ for track in tracks if track.get('id')])
+ missing_track_ids = [
+ track_id for track_id in track_ids
+ if track_id not in present_track_ids]
+ missing_tracks = self._download_json(
+ 'https://music.yandex.%s/handlers/track-entries.jsx' % tld,
+ playlist_id, 'Downloading missing tracks JSON',
+ fatal=False,
+ headers={
+ 'Referer': url,
+ 'X-Requested-With': 'XMLHttpRequest',
+ },
+ query={
+ 'entries': ','.join(missing_track_ids),
+ 'lang': tld,
+ 'external-domain': 'music.yandex.%s' % tld,
+ 'overembed': 'false',
+ 'strict': 'true',
+ })
+ if missing_tracks:
+ tracks.extend(missing_tracks)
+
+ return self.playlist_result(
+ self._build_playlist(tracks),
+ compat_str(playlist_id),
+ playlist.get('title'), playlist.get('description'))
diff --git a/youtube_dl/extractor/yandexvideo.py b/youtube_dlc/extractor/yandexvideo.py
index 46529be05..46529be05 100644
--- a/youtube_dl/extractor/yandexvideo.py
+++ b/youtube_dlc/extractor/yandexvideo.py
diff --git a/youtube_dl/extractor/yapfiles.py b/youtube_dlc/extractor/yapfiles.py
index cfb368de9..cfb368de9 100644
--- a/youtube_dl/extractor/yapfiles.py
+++ b/youtube_dlc/extractor/yapfiles.py
diff --git a/youtube_dl/extractor/yesjapan.py b/youtube_dlc/extractor/yesjapan.py
index 681338c96..681338c96 100644
--- a/youtube_dl/extractor/yesjapan.py
+++ b/youtube_dlc/extractor/yesjapan.py
diff --git a/youtube_dl/extractor/yinyuetai.py b/youtube_dlc/extractor/yinyuetai.py
index 1fd8d35c6..1fd8d35c6 100644
--- a/youtube_dl/extractor/yinyuetai.py
+++ b/youtube_dlc/extractor/yinyuetai.py
diff --git a/youtube_dl/extractor/ynet.py b/youtube_dlc/extractor/ynet.py
index c4ae4d88e..c4ae4d88e 100644
--- a/youtube_dl/extractor/ynet.py
+++ b/youtube_dlc/extractor/ynet.py
diff --git a/youtube_dlc/extractor/youjizz.py b/youtube_dlc/extractor/youjizz.py
new file mode 100644
index 000000000..88aabd272
--- /dev/null
+++ b/youtube_dlc/extractor/youjizz.py
@@ -0,0 +1,95 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ parse_duration,
+ url_or_none,
+)
+
+
+class YouJizzIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]*-(?P<id>\d+)\.html|embed/(?P<embed_id>\d+))'
+ _TESTS = [{
+ 'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
+ 'md5': 'b1e1dfaa8bb9537d8b84eeda9cf4acf4',
+ 'info_dict': {
+ 'id': '2189178',
+ 'ext': 'mp4',
+ 'title': 'Zeichentrick 1',
+ 'age_limit': 18,
+ 'duration': 2874,
+ }
+ }, {
+ 'url': 'http://www.youjizz.com/videos/-2189178.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youjizz.com/videos/embed/31991001',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id') or mobj.group('embed_id')
+
+ webpage = self._download_webpage(url, video_id)
+
+ title = self._html_search_regex(
+ r'<title>(.+?)</title>', webpage, 'title')
+
+ formats = []
+
+ encodings = self._parse_json(
+ self._search_regex(
+ r'[Ee]ncodings\s*=\s*(\[.+?\]);\n', webpage, 'encodings',
+ default='[]'),
+ video_id, fatal=False)
+ for encoding in encodings:
+ if not isinstance(encoding, dict):
+ continue
+ format_url = url_or_none(encoding.get('filename'))
+ if not format_url:
+ continue
+ if determine_ext(format_url) == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', entry_protocol='m3u8_native',
+ m3u8_id='hls', fatal=False))
+ else:
+ format_id = encoding.get('name') or encoding.get('quality')
+ height = int_or_none(self._search_regex(
+ r'^(\d+)[pP]', format_id, 'height', default=None))
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ 'height': height,
+ })
+
+ if formats:
+ info_dict = {
+ 'formats': formats,
+ }
+ else:
+ # YouJizz's HTML5 player has invalid HTML
+ webpage = webpage.replace('"controls', '" controls')
+ info_dict = self._parse_html5_media_entries(
+ url, webpage, video_id)[0]
+
+ duration = parse_duration(self._search_regex(
+ r'<strong>Runtime:</strong>([^<]+)', webpage, 'duration',
+ default=None))
+ uploader = self._search_regex(
+ r'<strong>Uploaded By:.*?<a[^>]*>([^<]+)', webpage, 'uploader',
+ default=None)
+
+ info_dict.update({
+ 'id': video_id,
+ 'title': title,
+ 'age_limit': self._rta_search(webpage),
+ 'duration': duration,
+ 'uploader': uploader,
+ })
+
+ return info_dict
diff --git a/youtube_dl/extractor/youku.py b/youtube_dlc/extractor/youku.py
index 61d1ab209..61d1ab209 100644
--- a/youtube_dl/extractor/youku.py
+++ b/youtube_dlc/extractor/youku.py
diff --git a/youtube_dl/extractor/younow.py b/youtube_dlc/extractor/younow.py
index 04dbc87fc..04dbc87fc 100644
--- a/youtube_dl/extractor/younow.py
+++ b/youtube_dlc/extractor/younow.py
diff --git a/youtube_dlc/extractor/youporn.py b/youtube_dlc/extractor/youporn.py
new file mode 100644
index 000000000..e7fca22de
--- /dev/null
+++ b/youtube_dlc/extractor/youporn.py
@@ -0,0 +1,203 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ int_or_none,
+ str_to_int,
+ unescapeHTML,
+ unified_strdate,
+ url_or_none,
+)
+from ..aes import aes_decrypt_text
+
+
+class YouPornIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?youporn\.com/(?:watch|embed)/(?P<id>\d+)(?:/(?P<display_id>[^/?#&]+))?'
+ _TESTS = [{
+ 'url': 'http://www.youporn.com/watch/505835/sex-ed-is-it-safe-to-masturbate-daily/',
+ 'md5': '3744d24c50438cf5b6f6d59feb5055c2',
+ 'info_dict': {
+ 'id': '505835',
+ 'display_id': 'sex-ed-is-it-safe-to-masturbate-daily',
+ 'ext': 'mp4',
+ 'title': 'Sex Ed: Is It Safe To Masturbate Daily?',
+ 'description': 'Love & Sex Answers: http://bit.ly/DanAndJenn -- Is It Unhealthy To Masturbate Daily?',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Ask Dan And Jennifer',
+ 'upload_date': '20101217',
+ 'average_rating': int,
+ 'view_count': int,
+ 'comment_count': int,
+ 'categories': list,
+ 'tags': list,
+ 'age_limit': 18,
+ },
+ }, {
+ # Unknown uploader
+ 'url': 'http://www.youporn.com/watch/561726/big-tits-awesome-brunette-on-amazing-webcam-show/?from=related3&al=2&from_id=561726&pos=4',
+ 'info_dict': {
+ 'id': '561726',
+ 'display_id': 'big-tits-awesome-brunette-on-amazing-webcam-show',
+ 'ext': 'mp4',
+ 'title': 'Big Tits Awesome Brunette On amazing webcam show',
+ 'description': 'http://sweetlivegirls.com Big Tits Awesome Brunette On amazing webcam show.mp4',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Unknown',
+ 'upload_date': '20110418',
+ 'average_rating': int,
+ 'view_count': int,
+ 'comment_count': int,
+ 'categories': list,
+ 'tags': list,
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.youporn.com/embed/505835/sex-ed-is-it-safe-to-masturbate-daily/',
+ 'only_matching': True,
+ }, {
+ 'url': 'http://www.youporn.com/watch/505835',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return re.findall(
+ r'<iframe[^>]+\bsrc=["\']((?:https?:)?//(?:www\.)?youporn\.com/embed/\d+)',
+ webpage)
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id') or video_id
+
+ webpage = self._download_webpage(
+ 'http://www.youporn.com/watch/%s' % video_id, display_id,
+ headers={'Cookie': 'age_verified=1'})
+
+ title = self._html_search_regex(
+ r'(?s)<div[^>]+class=["\']watchVideoTitle[^>]+>(.+?)</div>',
+ webpage, 'title', default=None) or self._og_search_title(
+ webpage, default=None) or self._html_search_meta(
+ 'title', webpage, fatal=True)
+
+ links = []
+
+ # Main source
+ definitions = self._parse_json(
+ self._search_regex(
+ r'mediaDefinition\s*=\s*(\[.+?\]);', webpage,
+ 'media definitions', default='[]'),
+ video_id, fatal=False)
+ if definitions:
+ for definition in definitions:
+ if not isinstance(definition, dict):
+ continue
+ video_url = url_or_none(definition.get('videoUrl'))
+ if video_url:
+ links.append(video_url)
+
+ # Fallback #1, this also contains extra low quality 180p format
+ for _, link in re.findall(r'<a[^>]+href=(["\'])(http.+?)\1[^>]+title=["\']Download [Vv]ideo', webpage):
+ links.append(link)
+
+ # Fallback #2 (unavailable as at 22.06.2017)
+ sources = self._search_regex(
+ r'(?s)sources\s*:\s*({.+?})', webpage, 'sources', default=None)
+ if sources:
+ for _, link in re.findall(r'[^:]+\s*:\s*(["\'])(http.+?)\1', sources):
+ links.append(link)
+
+ # Fallback #3 (unavailable as at 22.06.2017)
+ for _, link in re.findall(
+ r'(?:videoSrc|videoIpadUrl|html5PlayerSrc)\s*[:=]\s*(["\'])(http.+?)\1', webpage):
+ links.append(link)
+
+ # Fallback #4, encrypted links (unavailable as at 22.06.2017)
+ for _, encrypted_link in re.findall(
+ r'encryptedQuality\d{3,4}URL\s*=\s*(["\'])([\da-zA-Z+/=]+)\1', webpage):
+ links.append(aes_decrypt_text(encrypted_link, title, 32).decode('utf-8'))
+
+ formats = []
+ for video_url in set(unescapeHTML(link) for link in links):
+ f = {
+ 'url': video_url,
+ }
+ # Video URL's path looks like this:
+ # /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
+ # /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
+ # We will benefit from it by extracting some metadata
+ mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+/', video_url)
+ if mobj:
+ height = int(mobj.group('height'))
+ bitrate = int(mobj.group('bitrate'))
+ f.update({
+ 'format_id': '%dp-%dk' % (height, bitrate),
+ 'height': height,
+ 'tbr': bitrate,
+ })
+ formats.append(f)
+ self._sort_formats(formats)
+
+ description = self._html_search_regex(
+ r'(?s)<div[^>]+\bid=["\']description["\'][^>]*>(.+?)</div>',
+ webpage, 'description',
+ default=None) or self._og_search_description(
+ webpage, default=None)
+ thumbnail = self._search_regex(
+ r'(?:imageurl\s*=|poster\s*:)\s*(["\'])(?P<thumbnail>.+?)\1',
+ webpage, 'thumbnail', fatal=False, group='thumbnail')
+
+ uploader = self._html_search_regex(
+ r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
+ webpage, 'uploader', fatal=False)
+ upload_date = unified_strdate(self._html_search_regex(
+ [r'Date\s+[Aa]dded:\s*<span>([^<]+)',
+ r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>'],
+ webpage, 'upload date', fatal=False))
+
+ age_limit = self._rta_search(webpage)
+
+ average_rating = int_or_none(self._search_regex(
+ r'<div[^>]+class=["\']videoRatingPercentage["\'][^>]*>(\d+)%</div>',
+ webpage, 'average rating', fatal=False))
+
+ view_count = str_to_int(self._search_regex(
+ r'(?s)<div[^>]+class=(["\']).*?\bvideoInfoViews\b.*?\1[^>]*>.*?(?P<count>[\d,.]+)<',
+ webpage, 'view count', fatal=False, group='count'))
+ comment_count = str_to_int(self._search_regex(
+ r'>All [Cc]omments? \(([\d,.]+)\)',
+ webpage, 'comment count', fatal=False))
+
+ def extract_tag_box(regex, title):
+ tag_box = self._search_regex(regex, webpage, title, default=None)
+ if not tag_box:
+ return []
+ return re.findall(r'<a[^>]+href=[^>]+>([^<]+)', tag_box)
+
+ categories = extract_tag_box(
+ r'(?s)Categories:.*?</[^>]+>(.+?)</div>', 'categories')
+ tags = extract_tag_box(
+ r'(?s)Tags:.*?</div>\s*<div[^>]+class=["\']tagBoxContent["\'][^>]*>(.+?)</div>',
+ 'tags')
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'uploader': uploader,
+ 'upload_date': upload_date,
+ 'average_rating': average_rating,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'categories': categories,
+ 'tags': tags,
+ 'age_limit': age_limit,
+ 'formats': formats,
+ }
diff --git a/youtube_dlc/extractor/yourporn.py b/youtube_dlc/extractor/yourporn.py
new file mode 100644
index 000000000..98347491e
--- /dev/null
+++ b/youtube_dlc/extractor/yourporn.py
@@ -0,0 +1,67 @@
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ parse_duration,
+ urljoin,
+)
+
+
+class YourPornIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?sxyprn\.com/post/(?P<id>[^/?#&.]+)'
+ _TESTS = [{
+ 'url': 'https://sxyprn.com/post/57ffcb2e1179b.html',
+ 'md5': '6f8682b6464033d87acaa7a8ff0c092e',
+ 'info_dict': {
+ 'id': '57ffcb2e1179b',
+ 'ext': 'mp4',
+ 'title': 'md5:c9f43630bd968267672651ba905a7d35',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 165,
+ 'age_limit': 18,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://sxyprn.com/post/57ffcb2e1179b.html',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ parts = self._parse_json(
+ self._search_regex(
+ r'data-vnfo=(["\'])(?P<data>{.+?})\1', webpage, 'data info',
+ group='data'),
+ video_id)[video_id].split('/')
+
+ num = 0
+ for c in parts[6] + parts[7]:
+ if c.isnumeric():
+ num += int(c)
+ parts[5] = compat_str(int(parts[5]) - num)
+ parts[1] += '8'
+ video_url = urljoin(url, '/'.join(parts))
+
+ title = (self._search_regex(
+ r'<[^>]+\bclass=["\']PostEditTA[^>]+>([^<]+)', webpage, 'title',
+ default=None) or self._og_search_description(webpage)).strip()
+ thumbnail = self._og_search_thumbnail(webpage)
+ duration = parse_duration(self._search_regex(
+ r'duration\s*:\s*<[^>]+>([\d:]+)', webpage, 'duration',
+ default=None))
+
+ return {
+ 'id': video_id,
+ 'url': video_url,
+ 'title': title,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'age_limit': 18,
+ 'ext': 'mp4',
+ }
diff --git a/youtube_dl/extractor/yourupload.py b/youtube_dlc/extractor/yourupload.py
index 9fa772838..9fa772838 100644
--- a/youtube_dl/extractor/yourupload.py
+++ b/youtube_dlc/extractor/yourupload.py
diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py
new file mode 100644
index 000000000..70a5bd3b0
--- /dev/null
+++ b/youtube_dlc/extractor/youtube.py
@@ -0,0 +1,3445 @@
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+
+import itertools
+import json
+import os.path
+import random
+import re
+import time
+import traceback
+
+from .common import InfoExtractor, SearchInfoExtractor
+from ..jsinterp import JSInterpreter
+from ..swfinterp import SWFInterpreter
+from ..compat import (
+ compat_chr,
+ compat_HTTPError,
+ compat_kwargs,
+ compat_parse_qs,
+ compat_urllib_parse_unquote,
+ compat_urllib_parse_unquote_plus,
+ compat_urllib_parse_urlencode,
+ compat_urllib_parse_urlparse,
+ compat_urlparse,
+ compat_str,
+)
+from ..utils import (
+ bool_or_none,
+ clean_html,
+ error_to_compat_str,
+ extract_attributes,
+ ExtractorError,
+ float_or_none,
+ get_element_by_attribute,
+ get_element_by_id,
+ int_or_none,
+ mimetype2ext,
+ orderedSet,
+ parse_codecs,
+ parse_duration,
+ remove_quotes,
+ remove_start,
+ smuggle_url,
+ str_or_none,
+ str_to_int,
+ try_get,
+ unescapeHTML,
+ unified_strdate,
+ unsmuggle_url,
+ uppercase_escape,
+ url_or_none,
+ urlencode_postdata,
+)
+
+
+class YoutubeBaseInfoExtractor(InfoExtractor):
+ """Provide base functions for Youtube extractors"""
+ _LOGIN_URL = 'https://accounts.google.com/ServiceLogin'
+ _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge'
+
+ _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup'
+ _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge'
+ _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
+
+ _NETRC_MACHINE = 'youtube'
+ # If True it will raise an error if no login info is provided
+ _LOGIN_REQUIRED = False
+
+ _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
+
+ _YOUTUBE_CLIENT_HEADERS = {
+ 'x-youtube-client-name': '1',
+ 'x-youtube-client-version': '1.20200609.04.02',
+ }
+
+ def _set_language(self):
+ self._set_cookie(
+ '.youtube.com', 'PREF', 'f1=50000000&f6=8&hl=en',
+ # YouTube sets the expire time to about two months
+ expire_time=time.time() + 2 * 30 * 24 * 3600)
+
+ def _ids_to_results(self, ids):
+ return [
+ self.url_result(vid_id, 'Youtube', video_id=vid_id)
+ for vid_id in ids]
+
+ def _login(self):
+ """
+ Attempt to log in to YouTube.
+ True is returned if successful or skipped.
+ False is returned if login failed.
+
+ If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised.
+ """
+ username, password = self._get_login_info()
+ # No authentication to be performed
+ if username is None:
+ if self._LOGIN_REQUIRED and self._downloader.params.get('cookiefile') is None:
+ raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
+ return True
+
+ login_page = self._download_webpage(
+ self._LOGIN_URL, None,
+ note='Downloading login page',
+ errnote='unable to fetch login page', fatal=False)
+ if login_page is False:
+ return
+
+ login_form = self._hidden_inputs(login_page)
+
+ def req(url, f_req, note, errnote):
+ data = login_form.copy()
+ data.update({
+ 'pstMsg': 1,
+ 'checkConnection': 'youtube',
+ 'checkedDomains': 'youtube',
+ 'hl': 'en',
+ 'deviceinfo': '[null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]',
+ 'f.req': json.dumps(f_req),
+ 'flowName': 'GlifWebSignIn',
+ 'flowEntry': 'ServiceLogin',
+ # TODO: reverse actual botguard identifier generation algo
+ 'bgRequest': '["identifier",""]',
+ })
+ return self._download_json(
+ url, None, note=note, errnote=errnote,
+ transform_source=lambda s: re.sub(r'^[^[]*', '', s),
+ fatal=False,
+ data=urlencode_postdata(data), headers={
+ 'Content-Type': 'application/x-www-form-urlencoded;charset=utf-8',
+ 'Google-Accounts-XSRF': 1,
+ })
+
+ def warn(message):
+ self._downloader.report_warning(message)
+
+ lookup_req = [
+ username,
+ None, [], None, 'US', None, None, 2, False, True,
+ [
+ None, None,
+ [2, 1, None, 1,
+ 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn',
+ None, [], 4],
+ 1, [None, None, []], None, None, None, True
+ ],
+ username,
+ ]
+
+ lookup_results = req(
+ self._LOOKUP_URL, lookup_req,
+ 'Looking up account info', 'Unable to look up account info')
+
+ if lookup_results is False:
+ return False
+
+ user_hash = try_get(lookup_results, lambda x: x[0][2], compat_str)
+ if not user_hash:
+ warn('Unable to extract user hash')
+ return False
+
+ challenge_req = [
+ user_hash,
+ None, 1, None, [1, None, None, None, [password, None, True]],
+ [
+ None, None, [2, 1, None, 1, 'https://accounts.google.com/ServiceLogin?passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Fnext%3D%252F%26action_handle_signin%3Dtrue%26hl%3Den%26app%3Ddesktop%26feature%3Dsign_in_button&hl=en&service=youtube&uilel=3&requestPath=%2FServiceLogin&Page=PasswordSeparationSignIn', None, [], 4],
+ 1, [None, None, []], None, None, None, True
+ ]]
+
+ challenge_results = req(
+ self._CHALLENGE_URL, challenge_req,
+ 'Logging in', 'Unable to log in')
+
+ if challenge_results is False:
+ return
+
+ login_res = try_get(challenge_results, lambda x: x[0][5], list)
+ if login_res:
+ login_msg = try_get(login_res, lambda x: x[5], compat_str)
+ warn(
+ 'Unable to login: %s' % 'Invalid password'
+ if login_msg == 'INCORRECT_ANSWER_ENTERED' else login_msg)
+ return False
+
+ res = try_get(challenge_results, lambda x: x[0][-1], list)
+ if not res:
+ warn('Unable to extract result entry')
+ return False
+
+ login_challenge = try_get(res, lambda x: x[0][0], list)
+ if login_challenge:
+ challenge_str = try_get(login_challenge, lambda x: x[2], compat_str)
+ if challenge_str == 'TWO_STEP_VERIFICATION':
+ # SEND_SUCCESS - TFA code has been successfully sent to phone
+ # QUOTA_EXCEEDED - reached the limit of TFA codes
+ status = try_get(login_challenge, lambda x: x[5], compat_str)
+ if status == 'QUOTA_EXCEEDED':
+ warn('Exceeded the limit of TFA codes, try later')
+ return False
+
+ tl = try_get(challenge_results, lambda x: x[1][2], compat_str)
+ if not tl:
+ warn('Unable to extract TL')
+ return False
+
+ tfa_code = self._get_tfa_info('2-step verification code')
+
+ if not tfa_code:
+ warn(
+ 'Two-factor authentication required. Provide it either interactively or with --twofactor <code>'
+ '(Note that only TOTP (Google Authenticator App) codes work at this time.)')
+ return False
+
+ tfa_code = remove_start(tfa_code, 'G-')
+
+ tfa_req = [
+ user_hash, None, 2, None,
+ [
+ 9, None, None, None, None, None, None, None,
+ [None, tfa_code, True, 2]
+ ]]
+
+ tfa_results = req(
+ self._TFA_URL.format(tl), tfa_req,
+ 'Submitting TFA code', 'Unable to submit TFA code')
+
+ if tfa_results is False:
+ return False
+
+ tfa_res = try_get(tfa_results, lambda x: x[0][5], list)
+ if tfa_res:
+ tfa_msg = try_get(tfa_res, lambda x: x[5], compat_str)
+ warn(
+ 'Unable to finish TFA: %s' % 'Invalid TFA code'
+ if tfa_msg == 'INCORRECT_ANSWER_ENTERED' else tfa_msg)
+ return False
+
+ check_cookie_url = try_get(
+ tfa_results, lambda x: x[0][-1][2], compat_str)
+ else:
+ CHALLENGES = {
+ 'LOGIN_CHALLENGE': "This device isn't recognized. For your security, Google wants to make sure it's really you.",
+ 'USERNAME_RECOVERY': 'Please provide additional information to aid in the recovery process.',
+ 'REAUTH': "There is something unusual about your activity. For your security, Google wants to make sure it's really you.",
+ }
+ challenge = CHALLENGES.get(
+ challenge_str,
+ '%s returned error %s.' % (self.IE_NAME, challenge_str))
+ warn('%s\nGo to https://accounts.google.com/, login and solve a challenge.' % challenge)
+ return False
+ else:
+ check_cookie_url = try_get(res, lambda x: x[2], compat_str)
+
+ if not check_cookie_url:
+ warn('Unable to extract CheckCookie URL')
+ return False
+
+ check_cookie_results = self._download_webpage(
+ check_cookie_url, None, 'Checking cookie', fatal=False)
+
+ if check_cookie_results is False:
+ return False
+
+ if 'https://myaccount.google.com/' not in check_cookie_results:
+ warn('Unable to log in')
+ return False
+
+ return True
+
+ def _download_webpage_handle(self, *args, **kwargs):
+ query = kwargs.get('query', {}).copy()
+ query['disable_polymer'] = 'true'
+ kwargs['query'] = query
+ return super(YoutubeBaseInfoExtractor, self)._download_webpage_handle(
+ *args, **compat_kwargs(kwargs))
+
+ def _real_initialize(self):
+ if self._downloader is None:
+ return
+ self._set_language()
+ if not self._login():
+ return
+
+
+class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
+ # Extract entries from page with "Load more" button
+ def _entries(self, page, playlist_id):
+ more_widget_html = content_html = page
+ for page_num in itertools.count(1):
+ for entry in self._process_page(content_html):
+ yield entry
+
+ mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
+ if not mobj:
+ break
+
+ count = 0
+ retries = 3
+ while count <= retries:
+ try:
+ # Downloading page may result in intermittent 5xx HTTP error
+ # that is usually worked around with a retry
+ more = self._download_json(
+ 'https://www.youtube.com/%s' % mobj.group('more'), playlist_id,
+ 'Downloading page #%s%s'
+ % (page_num, ' (retry #%d)' % count if count else ''),
+ transform_source=uppercase_escape,
+ headers=self._YOUTUBE_CLIENT_HEADERS)
+ break
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
+ count += 1
+ if count <= retries:
+ continue
+ raise
+
+ content_html = more['content_html']
+ if not content_html.strip():
+ # Some webpages show a "Load more" button but they don't
+ # have more videos
+ break
+ more_widget_html = more['load_more_widget_html']
+
+
+class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
+ def _process_page(self, content):
+ for video_id, video_title in self.extract_videos_from_page(content):
+ yield self.url_result(video_id, 'Youtube', video_id, video_title)
+
+ def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page):
+ for mobj in re.finditer(video_re, page):
+ # The link with index 0 is not the first video of the playlist (not sure if still actual)
+ if 'index' in mobj.groupdict() and mobj.group('id') == '0':
+ continue
+ video_id = mobj.group('id')
+ video_title = unescapeHTML(
+ mobj.group('title')) if 'title' in mobj.groupdict() else None
+ if video_title:
+ video_title = video_title.strip()
+ if video_title == '► Play all':
+ video_title = None
+ try:
+ idx = ids_in_page.index(video_id)
+ if video_title and not titles_in_page[idx]:
+ titles_in_page[idx] = video_title
+ except ValueError:
+ ids_in_page.append(video_id)
+ titles_in_page.append(video_title)
+
+ def extract_videos_from_page(self, page):
+ ids_in_page = []
+ titles_in_page = []
+ self.extract_videos_from_page_impl(
+ self._VIDEO_RE, page, ids_in_page, titles_in_page)
+ return zip(ids_in_page, titles_in_page)
+
+
+class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor):
+ def _process_page(self, content):
+ for playlist_id in orderedSet(re.findall(
+ r'<h3[^>]+class="[^"]*yt-lockup-title[^"]*"[^>]*><a[^>]+href="/?playlist\?list=([0-9A-Za-z-_]{10,})"',
+ content)):
+ yield self.url_result(
+ 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist')
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ webpage = self._download_webpage(url, playlist_id)
+ title = self._og_search_title(webpage, fatal=False)
+ return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title)
+
+
+class YoutubeIE(YoutubeBaseInfoExtractor):
+ IE_DESC = 'YouTube.com'
+ _VALID_URL = r"""(?x)^
+ (
+ (?:https?://|//) # http(s):// or protocol-independent URL
+ (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com/|
+ (?:www\.)?deturl\.com/www\.youtube\.com/|
+ (?:www\.)?pwnyoutube\.com/|
+ (?:www\.)?hooktube\.com/|
+ (?:www\.)?yourepeat\.com/|
+ tube\.majestyc\.net/|
+ # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
+ (?:(?:www|dev)\.)?invidio\.us/|
+ (?:(?:www|no)\.)?invidiou\.sh/|
+ (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
+ (?:www\.)?invidious\.kabi\.tk/|
+ (?:www\.)?invidious\.13ad\.de/|
+ (?:www\.)?invidious\.mastodon\.host/|
+ (?:www\.)?invidious\.nixnet\.xyz/|
+ (?:www\.)?invidious\.drycat\.fr/|
+ (?:www\.)?tube\.poal\.co/|
+ (?:www\.)?vid\.wxzm\.sx/|
+ (?:www\.)?yewtu\.be/|
+ (?:www\.)?yt\.elukerio\.org/|
+ (?:www\.)?yt\.lelux\.fi/|
+ (?:www\.)?invidious\.ggc-project\.de/|
+ (?:www\.)?yt\.maisputain\.ovh/|
+ (?:www\.)?invidious\.13ad\.de/|
+ (?:www\.)?invidious\.toot\.koeln/|
+ (?:www\.)?invidious\.fdn\.fr/|
+ (?:www\.)?watch\.nettohikari\.com/|
+ (?:www\.)?kgg2m7yk5aybusll\.onion/|
+ (?:www\.)?qklhadlycap4cnod\.onion/|
+ (?:www\.)?axqzx4s6s54s32yentfqojs3x5i7faxza6xo3ehd4bzzsg2ii4fv2iid\.onion/|
+ (?:www\.)?c7hqkpkpemu6e7emz5b4vyz7idjgdvgaaa3dyimmeojqbgpea3xqjoid\.onion/|
+ (?:www\.)?fz253lmuao3strwbfbmx46yu7acac2jz27iwtorgmbqlkurlclmancad\.onion/|
+ (?:www\.)?invidious\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\.onion/|
+ (?:www\.)?owxfohz4kjyv25fvlqilyxast7inivgiktls3th44jhk3ej3i7ya\.b32\.i2p/|
+ (?:www\.)?4l2dgddgsrkf2ous66i6seeyi6etzfgrue332grh2n7madpwopotugyd\.onion/|
+ youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains
+ (?:.*?\#/)? # handle anchor (#/) redirect urls
+ (?: # the various things that can precede the ID:
+ (?:(?:v|embed|e)/(?!videoseries)) # v/ or embed/ or e/
+ |(?: # or the v= param in all its forms
+ (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx)
+ (?:\?|\#!?) # the params delimiter ? or # or #!
+ (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&amp;v=V36LpHqtcDY)
+ v=
+ )
+ ))
+ |(?:
+ youtu\.be| # just youtu.be/xxxx
+ vid\.plus| # or vid.plus/xxxx
+ zwearz\.com/watch| # or zwearz.com/watch/xxxx
+ )/
+ |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=
+ )
+ )? # all until now is optional -> you can pass the naked ID
+ ([0-9A-Za-z_-]{11}) # here is it! the YouTube video ID
+ (?!.*?\blist=
+ (?:
+ %(playlist_id)s| # combined list/video URLs are handled by the playlist IE
+ WL # WL are handled by the watch later IE
+ )
+ )
+ (?(1).+)? # if we found the ID, everything can follow
+ $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
+ _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
+ _PLAYER_INFO_RE = (
+ r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
+ r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
+ )
+ _formats = {
+ '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
+ '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
+ '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'},
+ '17': {'ext': '3gp', 'width': 176, 'height': 144, 'acodec': 'aac', 'abr': 24, 'vcodec': 'mp4v'},
+ '18': {'ext': 'mp4', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 96, 'vcodec': 'h264'},
+ '22': {'ext': 'mp4', 'width': 1280, 'height': 720, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
+ '34': {'ext': 'flv', 'width': 640, 'height': 360, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
+ '35': {'ext': 'flv', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
+ # itag 36 videos are either 320x180 (BaW_jenozKc) or 320x240 (__2ABJjxzNo), abr varies as well
+ '36': {'ext': '3gp', 'width': 320, 'acodec': 'aac', 'vcodec': 'mp4v'},
+ '37': {'ext': 'mp4', 'width': 1920, 'height': 1080, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
+ '38': {'ext': 'mp4', 'width': 4096, 'height': 3072, 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264'},
+ '43': {'ext': 'webm', 'width': 640, 'height': 360, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
+ '44': {'ext': 'webm', 'width': 854, 'height': 480, 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8'},
+ '45': {'ext': 'webm', 'width': 1280, 'height': 720, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
+ '46': {'ext': 'webm', 'width': 1920, 'height': 1080, 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8'},
+ '59': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
+ '78': {'ext': 'mp4', 'width': 854, 'height': 480, 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264'},
+
+
+ # 3D videos
+ '82': {'ext': 'mp4', 'height': 360, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
+ '83': {'ext': 'mp4', 'height': 480, 'format_note': '3D', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -20},
+ '84': {'ext': 'mp4', 'height': 720, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
+ '85': {'ext': 'mp4', 'height': 1080, 'format_note': '3D', 'acodec': 'aac', 'abr': 192, 'vcodec': 'h264', 'preference': -20},
+ '100': {'ext': 'webm', 'height': 360, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 128, 'vcodec': 'vp8', 'preference': -20},
+ '101': {'ext': 'webm', 'height': 480, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
+ '102': {'ext': 'webm', 'height': 720, 'format_note': '3D', 'acodec': 'vorbis', 'abr': 192, 'vcodec': 'vp8', 'preference': -20},
+
+ # Apple HTTP Live Streaming
+ '91': {'ext': 'mp4', 'height': 144, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
+ '92': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
+ '93': {'ext': 'mp4', 'height': 360, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
+ '94': {'ext': 'mp4', 'height': 480, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 128, 'vcodec': 'h264', 'preference': -10},
+ '95': {'ext': 'mp4', 'height': 720, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
+ '96': {'ext': 'mp4', 'height': 1080, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 256, 'vcodec': 'h264', 'preference': -10},
+ '132': {'ext': 'mp4', 'height': 240, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 48, 'vcodec': 'h264', 'preference': -10},
+ '151': {'ext': 'mp4', 'height': 72, 'format_note': 'HLS', 'acodec': 'aac', 'abr': 24, 'vcodec': 'h264', 'preference': -10},
+
+ # DASH mp4 video
+ '133': {'ext': 'mp4', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '134': {'ext': 'mp4', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '138': {'ext': 'mp4', 'format_note': 'DASH video', 'vcodec': 'h264'}, # Height can vary (https://github.com/ytdl-org/youtube-dl/issues/4559)
+ '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '212': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'h264'},
+ '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
+ '299': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'h264', 'fps': 60},
+ '266': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'h264'},
+
+ # Dash mp4 audio
+ '139': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 48, 'container': 'm4a_dash'},
+ '140': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 128, 'container': 'm4a_dash'},
+ '141': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'abr': 256, 'container': 'm4a_dash'},
+ '256': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
+ '258': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'aac', 'container': 'm4a_dash'},
+ '325': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'dtse', 'container': 'm4a_dash'},
+ '328': {'ext': 'm4a', 'format_note': 'DASH audio', 'acodec': 'ec-3', 'container': 'm4a_dash'},
+
+ # Dash webm
+ '167': {'ext': 'webm', 'height': 360, 'width': 640, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '168': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '169': {'ext': 'webm', 'height': 720, 'width': 1280, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '170': {'ext': 'webm', 'height': 1080, 'width': 1920, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '218': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '219': {'ext': 'webm', 'height': 480, 'width': 854, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp8'},
+ '278': {'ext': 'webm', 'height': 144, 'format_note': 'DASH video', 'container': 'webm', 'vcodec': 'vp9'},
+ '242': {'ext': 'webm', 'height': 240, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '243': {'ext': 'webm', 'height': 360, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '244': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '245': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '246': {'ext': 'webm', 'height': 480, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug)
+ '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+ '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+ '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+ '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9'},
+ '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'vp9', 'fps': 60},
+
+ # Dash webm audio
+ '171': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 128},
+ '172': {'ext': 'webm', 'acodec': 'vorbis', 'format_note': 'DASH audio', 'abr': 256},
+
+ # Dash webm audio with opus inside
+ '249': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 50},
+ '250': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 70},
+ '251': {'ext': 'webm', 'format_note': 'DASH audio', 'acodec': 'opus', 'abr': 160},
+
+ # RTMP (unnamed)
+ '_rtmp': {'protocol': 'rtmp'},
+
+ # av01 video only formats sometimes served with "unknown" codecs
+ '394': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
+ '395': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
+ '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
+ '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
+ }
+ _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')
+
+ _GEO_BYPASS = False
+
+ IE_NAME = 'youtube'
+ _TESTS = [
+ {
+ 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9',
+ 'info_dict': {
+ 'id': 'BaW_jenozKc',
+ 'ext': 'mp4',
+ 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
+ 'uploader': 'Philipp Hagemeister',
+ 'uploader_id': 'phihag',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
+ 'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
+ 'channel_url': r're:https?://(?:www\.)?youtube\.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
+ 'upload_date': '20121002',
+ 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
+ 'categories': ['Science & Technology'],
+ 'tags': ['youtube-dl'],
+ 'duration': 10,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ 'start_time': 1,
+ 'end_time': 9,
+ }
+ },
+ {
+ 'url': 'https://www.youtube.com/watch?v=UxxajLWwzqY',
+ 'note': 'Test generic use_cipher_signature video (#897)',
+ 'info_dict': {
+ 'id': 'UxxajLWwzqY',
+ 'ext': 'mp4',
+ 'upload_date': '20120506',
+ 'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
+ 'alt_title': 'I Love It (feat. Charli XCX)',
+ 'description': 'md5:19a2f98d9032b9311e686ed039564f63',
+ 'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
+ 'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
+ 'iconic ep', 'iconic', 'love', 'it'],
+ 'duration': 180,
+ 'uploader': 'Icona Pop',
+ 'uploader_id': 'IconaPop',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IconaPop',
+ 'creator': 'Icona Pop',
+ 'track': 'I Love It (feat. Charli XCX)',
+ 'artist': 'Icona Pop',
+ }
+ },
+ {
+ 'url': 'https://www.youtube.com/watch?v=07FYdnEawAQ',
+ 'note': 'Test VEVO video with age protection (#956)',
+ 'info_dict': {
+ 'id': '07FYdnEawAQ',
+ 'ext': 'mp4',
+ 'upload_date': '20130703',
+ 'title': 'Justin Timberlake - Tunnel Vision (Official Music Video) (Explicit)',
+ 'alt_title': 'Tunnel Vision',
+ 'description': 'md5:07dab3356cde4199048e4c7cd93471e1',
+ 'duration': 419,
+ 'uploader': 'justintimberlakeVEVO',
+ 'uploader_id': 'justintimberlakeVEVO',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/justintimberlakeVEVO',
+ 'creator': 'Justin Timberlake',
+ 'track': 'Tunnel Vision',
+ 'artist': 'Justin Timberlake',
+ 'age_limit': 18,
+ }
+ },
+ {
+ 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ',
+ 'note': 'Embed-only video (#1746)',
+ 'info_dict': {
+ 'id': 'yZIXLfi8CZQ',
+ 'ext': 'mp4',
+ 'upload_date': '20120608',
+ 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012',
+ 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7',
+ 'uploader': 'SET India',
+ 'uploader_id': 'setindia',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/setindia',
+ 'age_limit': 18,
+ }
+ },
+ {
+ 'url': 'https://www.youtube.com/watch?v=BaW_jenozKc&v=UxxajLWwzqY',
+ 'note': 'Use the first video ID in the URL',
+ 'info_dict': {
+ 'id': 'BaW_jenozKc',
+ 'ext': 'mp4',
+ 'title': 'youtube-dl test video "\'/\\ä↭𝕐',
+ 'uploader': 'Philipp Hagemeister',
+ 'uploader_id': 'phihag',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/phihag',
+ 'upload_date': '20121002',
+ 'description': 'test chars: "\'/\\ä↭𝕐\ntest URL: https://github.com/rg3/youtube-dl/issues/1892\n\nThis is a test video for youtube-dl.\n\nFor more information, contact phihag@phihag.de .',
+ 'categories': ['Science & Technology'],
+ 'tags': ['youtube-dl'],
+ 'duration': 10,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://www.youtube.com/watch?v=a9LDPn-MO4I',
+ 'note': '256k DASH audio (format 141) via DASH manifest',
+ 'info_dict': {
+ 'id': 'a9LDPn-MO4I',
+ 'ext': 'm4a',
+ 'upload_date': '20121002',
+ 'uploader_id': '8KVIDEO',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/8KVIDEO',
+ 'description': '',
+ 'uploader': '8KVIDEO',
+ 'title': 'UHDTV TEST 8K VIDEO.mp4'
+ },
+ 'params': {
+ 'youtube_include_dash_manifest': True,
+ 'format': '141',
+ },
+ 'skip': 'format 141 not served anymore',
+ },
+ # DASH manifest with encrypted signature
+ {
+ 'url': 'https://www.youtube.com/watch?v=IB3lcPjvWLA',
+ 'info_dict': {
+ 'id': 'IB3lcPjvWLA',
+ 'ext': 'm4a',
+ 'title': 'Afrojack, Spree Wilson - The Spark (Official Music Video) ft. Spree Wilson',
+ 'description': 'md5:8f5e2b82460520b619ccac1f509d43bf',
+ 'duration': 244,
+ 'uploader': 'AfrojackVEVO',
+ 'uploader_id': 'AfrojackVEVO',
+ 'upload_date': '20131011',
+ },
+ 'params': {
+ 'youtube_include_dash_manifest': True,
+ 'format': '141/bestaudio[ext=m4a]',
+ },
+ },
+ # JS player signature function name containing $
+ {
+ 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
+ 'info_dict': {
+ 'id': 'nfWlot6h_JM',
+ 'ext': 'm4a',
+ 'title': 'Taylor Swift - Shake It Off',
+ 'description': 'md5:307195cd21ff7fa352270fe884570ef0',
+ 'duration': 242,
+ 'uploader': 'TaylorSwiftVEVO',
+ 'uploader_id': 'TaylorSwiftVEVO',
+ 'upload_date': '20140818',
+ },
+ 'params': {
+ 'youtube_include_dash_manifest': True,
+ 'format': '141/bestaudio[ext=m4a]',
+ },
+ },
+ # Controversy video
+ {
+ 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
+ 'info_dict': {
+ 'id': 'T4XJQO3qol8',
+ 'ext': 'mp4',
+ 'duration': 219,
+ 'upload_date': '20100909',
+ 'uploader': 'Amazing Atheist',
+ 'uploader_id': 'TheAmazingAtheist',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
+ 'title': 'Burning Everyone\'s Koran',
+ 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
+ }
+ },
+ # Normal age-gate video (No vevo, embed allowed)
+ {
+ 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
+ 'info_dict': {
+ 'id': 'HtVdAasjOgU',
+ 'ext': 'mp4',
+ 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
+ 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
+ 'duration': 142,
+ 'uploader': 'The Witcher',
+ 'uploader_id': 'WitcherGame',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
+ 'upload_date': '20140605',
+ 'age_limit': 18,
+ },
+ },
+ # Age-gate video with encrypted signature
+ {
+ 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
+ 'info_dict': {
+ 'id': '6kLq3WMV1nU',
+ 'ext': 'mp4',
+ 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
+ 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
+ 'duration': 246,
+ 'uploader': 'LloydVEVO',
+ 'uploader_id': 'LloydVEVO',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
+ 'upload_date': '20110629',
+ 'age_limit': 18,
+ },
+ },
+ # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
+ # YouTube Red ad is not captured for creator
+ {
+ 'url': '__2ABJjxzNo',
+ 'info_dict': {
+ 'id': '__2ABJjxzNo',
+ 'ext': 'mp4',
+ 'duration': 266,
+ 'upload_date': '20100430',
+ 'uploader_id': 'deadmau5',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
+ 'creator': 'Dada Life, deadmau5',
+ 'description': 'md5:12c56784b8032162bb936a5f76d55360',
+ 'uploader': 'deadmau5',
+ 'title': 'Deadmau5 - Some Chords (HD)',
+ 'alt_title': 'This Machine Kills Some Chords',
+ },
+ 'expected_warnings': [
+ 'DASH manifest missing',
+ ]
+ },
+ # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
+ {
+ 'url': 'lqQg6PlCWgI',
+ 'info_dict': {
+ 'id': 'lqQg6PlCWgI',
+ 'ext': 'mp4',
+ 'duration': 6085,
+ 'upload_date': '20150827',
+ 'uploader_id': 'olympic',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
+ 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
+ 'uploader': 'Olympic',
+ 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
+ },
+ 'params': {
+ 'skip_download': 'requires avconv',
+ }
+ },
+ # Non-square pixels
+ {
+ 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
+ 'info_dict': {
+ 'id': '_b-2C3KPAM0',
+ 'ext': 'mp4',
+ 'stretched_ratio': 16 / 9.,
+ 'duration': 85,
+ 'upload_date': '20110310',
+ 'uploader_id': 'AllenMeow',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
+ 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
+ 'uploader': '孫ᄋᄅ',
+ 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
+ },
+ },
+ # url_encoded_fmt_stream_map is empty string
+ {
+ 'url': 'qEJwOuvDf7I',
+ 'info_dict': {
+ 'id': 'qEJwOuvDf7I',
+ 'ext': 'webm',
+ 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
+ 'description': '',
+ 'upload_date': '20150404',
+ 'uploader_id': 'spbelect',
+ 'uploader': 'Наблюдатели Петербурга',
+ },
+ 'params': {
+ 'skip_download': 'requires avconv',
+ },
+ 'skip': 'This live event has ended.',
+ },
+ # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
+ {
+ 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
+ 'info_dict': {
+ 'id': 'FIl7x6_3R5Y',
+ 'ext': 'webm',
+ 'title': 'md5:7b81415841e02ecd4313668cde88737a',
+ 'description': 'md5:116377fd2963b81ec4ce64b542173306',
+ 'duration': 220,
+ 'upload_date': '20150625',
+ 'uploader_id': 'dorappi2000',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
+ 'uploader': 'dorappi2000',
+ 'formats': 'mincount:31',
+ },
+ 'skip': 'not actual anymore',
+ },
+ # DASH manifest with segment_list
+ {
+ 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
+ 'md5': '8ce563a1d667b599d21064e982ab9e31',
+ 'info_dict': {
+ 'id': 'CsmdDsKjzN8',
+ 'ext': 'mp4',
+ 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
+ 'uploader': 'Airtek',
+ 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
+ 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
+ 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
+ },
+ 'params': {
+ 'youtube_include_dash_manifest': True,
+ 'format': '135', # bestvideo
+ },
+ 'skip': 'This live event has ended.',
+ },
+ {
+ # Multifeed videos (multiple cameras), URL is for Main Camera
+ 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
+ 'info_dict': {
+ 'id': 'jqWvoWXjCVs',
+ 'title': 'teamPGP: Rocket League Noob Stream',
+ 'description': 'md5:dc7872fb300e143831327f1bae3af010',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'jqWvoWXjCVs',
+ 'ext': 'mp4',
+ 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
+ 'description': 'md5:dc7872fb300e143831327f1bae3af010',
+ 'duration': 7335,
+ 'upload_date': '20150721',
+ 'uploader': 'Beer Games Beer',
+ 'uploader_id': 'beergamesbeer',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
+ 'license': 'Standard YouTube License',
+ },
+ }, {
+ 'info_dict': {
+ 'id': '6h8e8xoXJzg',
+ 'ext': 'mp4',
+ 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
+ 'description': 'md5:dc7872fb300e143831327f1bae3af010',
+ 'duration': 7337,
+ 'upload_date': '20150721',
+ 'uploader': 'Beer Games Beer',
+ 'uploader_id': 'beergamesbeer',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
+ 'license': 'Standard YouTube License',
+ },
+ }, {
+ 'info_dict': {
+ 'id': 'PUOgX5z9xZw',
+ 'ext': 'mp4',
+ 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
+ 'description': 'md5:dc7872fb300e143831327f1bae3af010',
+ 'duration': 7337,
+ 'upload_date': '20150721',
+ 'uploader': 'Beer Games Beer',
+ 'uploader_id': 'beergamesbeer',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
+ 'license': 'Standard YouTube License',
+ },
+ }, {
+ 'info_dict': {
+ 'id': 'teuwxikvS5k',
+ 'ext': 'mp4',
+ 'title': 'teamPGP: Rocket League Noob Stream (zim)',
+ 'description': 'md5:dc7872fb300e143831327f1bae3af010',
+ 'duration': 7334,
+ 'upload_date': '20150721',
+ 'uploader': 'Beer Games Beer',
+ 'uploader_id': 'beergamesbeer',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
+ 'license': 'Standard YouTube License',
+ },
+ }],
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'This video is not available.',
+ },
+ {
+ # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
+ 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
+ 'info_dict': {
+ 'id': 'gVfLd0zydlo',
+ 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
+ },
+ 'playlist_count': 2,
+ 'skip': 'Not multifeed anymore',
+ },
+ {
+ 'url': 'https://vid.plus/FlRa-iH7PGw',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
+ 'only_matching': True,
+ },
+ {
+ # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
+ # Also tests cut-off URL expansion in video description (see
+ # https://github.com/ytdl-org/youtube-dl/issues/1892,
+ # https://github.com/ytdl-org/youtube-dl/issues/8164)
+ 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
+ 'info_dict': {
+ 'id': 'lsguqyKfVQg',
+ 'ext': 'mp4',
+ 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
+ 'alt_title': 'Dark Walk - Position Music',
+ 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
+ 'duration': 133,
+ 'upload_date': '20151119',
+ 'uploader_id': 'IronSoulElf',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
+ 'uploader': 'IronSoulElf',
+ 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
+ 'track': 'Dark Walk - Position Music',
+ 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
+ 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
+ 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
+ 'only_matching': True,
+ },
+ {
+ # Video with yt:stretch=17:0
+ 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
+ 'info_dict': {
+ 'id': 'Q39EVAstoRM',
+ 'ext': 'mp4',
+ 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
+ 'description': 'md5:ee18a25c350637c8faff806845bddee9',
+ 'upload_date': '20151107',
+ 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
+ 'uploader': 'CH GAMER DROID',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'This video does not exist.',
+ },
+ {
+ # Video licensed under Creative Commons
+ 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
+ 'info_dict': {
+ 'id': 'M4gD1WSo5mA',
+ 'ext': 'mp4',
+ 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
+ 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
+ 'duration': 721,
+ 'upload_date': '20150127',
+ 'uploader_id': 'BerkmanCenter',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
+ 'uploader': 'The Berkman Klein Center for Internet & Society',
+ 'license': 'Creative Commons Attribution license (reuse allowed)',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Channel-like uploader_url
+ 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
+ 'info_dict': {
+ 'id': 'eQcmzGIKrzg',
+ 'ext': 'mp4',
+ 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
+ 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
+ 'duration': 4060,
+ 'upload_date': '20151119',
+ 'uploader': 'Bernie Sanders',
+ 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
+ 'license': 'Creative Commons Attribution license (reuse allowed)',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;amp;v=V36LpHqtcDY',
+ 'only_matching': True,
+ },
+ {
+ # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
+ 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
+ 'only_matching': True,
+ },
+ {
+ # Rental video preview
+ 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
+ 'info_dict': {
+ 'id': 'uGpuVWrhIzE',
+ 'ext': 'mp4',
+ 'title': 'Piku - Trailer',
+ 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
+ 'upload_date': '20150811',
+ 'uploader': 'FlixMatrix',
+ 'uploader_id': 'FlixMatrixKaravan',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
+ 'license': 'Standard YouTube License',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'This video is not available.',
+ },
+ {
+ # YouTube Red video with episode data
+ 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
+ 'info_dict': {
+ 'id': 'iqKdEhx-dD4',
+ 'ext': 'mp4',
+ 'title': 'Isolation - Mind Field (Ep 1)',
+ 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
+ 'duration': 2085,
+ 'upload_date': '20170118',
+ 'uploader': 'Vsauce',
+ 'uploader_id': 'Vsauce',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
+ 'series': 'Mind Field',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': [
+ 'Skipping DASH manifest',
+ ],
+ },
+ {
+ # The following content has been identified by the YouTube community
+ # as inappropriate or offensive to some audiences.
+ 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
+ 'info_dict': {
+ 'id': '6SJNVb0GnPI',
+ 'ext': 'mp4',
+ 'title': 'Race Differences in Intelligence',
+ 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
+ 'duration': 965,
+ 'upload_date': '20140124',
+ 'uploader': 'New Century Foundation',
+ 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # itag 212
+ 'url': '1t24XAntNCY',
+ 'only_matching': True,
+ },
+ {
+ # geo restricted to JP
+ 'url': 'sJL6WA-aGkQ',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
+ 'only_matching': True,
+ },
+ {
+ # DRM protected
+ 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
+ 'only_matching': True,
+ },
+ {
+ # Video with unsupported adaptive stream type formats
+ 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
+ 'info_dict': {
+ 'id': 'Z4Vy8R84T1U',
+ 'ext': 'mp4',
+ 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'duration': 433,
+ 'upload_date': '20130923',
+ 'uploader': 'Amelia Putri Harwita',
+ 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
+ 'formats': 'maxcount:10',
+ },
+ 'params': {
+ 'skip_download': True,
+ 'youtube_include_dash_manifest': False,
+ },
+ 'skip': 'not actual anymore',
+ },
+ {
+ # Youtube Music Auto-generated description
+ 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
+ 'info_dict': {
+ 'id': 'MgNrAu2pzNs',
+ 'ext': 'mp4',
+ 'title': 'Voyeur Girl',
+ 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
+ 'upload_date': '20190312',
+ 'uploader': 'Stephen - Topic',
+ 'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
+ 'artist': 'Stephen',
+ 'track': 'Voyeur Girl',
+ 'album': 'it\'s too much love to know my dear',
+ 'release_date': '20190313',
+ 'release_year': 2019,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Youtube Music Auto-generated description
+ # Retrieve 'artist' field from 'Artist:' in video description
+ # when it is present on youtube music video
+ 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
+ 'info_dict': {
+ 'id': 'k0jLE7tTwjY',
+ 'ext': 'mp4',
+ 'title': 'Latch Feat. Sam Smith',
+ 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
+ 'upload_date': '20150110',
+ 'uploader': 'Various Artists - Topic',
+ 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
+ 'artist': 'Disclosure',
+ 'track': 'Latch Feat. Sam Smith',
+ 'album': 'Latch Featuring Sam Smith',
+ 'release_date': '20121008',
+ 'release_year': 2012,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Youtube Music Auto-generated description
+ # handle multiple artists on youtube music video
+ 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
+ 'info_dict': {
+ 'id': '74qn0eJSjpA',
+ 'ext': 'mp4',
+ 'title': 'Eastside',
+ 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
+ 'upload_date': '20180710',
+ 'uploader': 'Benny Blanco - Topic',
+ 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
+ 'artist': 'benny blanco, Halsey, Khalid',
+ 'track': 'Eastside',
+ 'album': 'Eastside',
+ 'release_date': '20180713',
+ 'release_year': 2018,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Youtube Music Auto-generated description
+ # handle youtube music video with release_year and no release_date
+ 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
+ 'info_dict': {
+ 'id': '-hcAI0g-f5M',
+ 'ext': 'mp4',
+ 'title': 'Put It On Me',
+ 'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
+ 'upload_date': '20180426',
+ 'uploader': 'Matt Maeson - Topic',
+ 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
+ 'artist': 'Matt Maeson',
+ 'track': 'Put It On Me',
+ 'album': 'The Hearse',
+ 'release_date': None,
+ 'release_year': 2018,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://www.youtubekids.com/watch?v=3b8nCWDgZ6Q',
+ 'only_matching': True,
+ },
+ {
+ # invalid -> valid video id redirection
+ 'url': 'DJztXj2GPfl',
+ 'info_dict': {
+ 'id': 'DJztXj2GPfk',
+ 'ext': 'mp4',
+ 'title': 'Panjabi MC - Mundian To Bach Ke (The Dictator Soundtrack)',
+ 'description': 'md5:bf577a41da97918e94fa9798d9228825',
+ 'upload_date': '20090125',
+ 'uploader': 'Prochorowka',
+ 'uploader_id': 'Prochorowka',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Prochorowka',
+ 'artist': 'Panjabi MC',
+ 'track': 'Beware of the Boys (Mundian to Bach Ke) - Motivo Hi-Lectro Remix',
+ 'album': 'Beware of the Boys (Mundian To Bach Ke)',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }
+ ]
+
+ def __init__(self, *args, **kwargs):
+ super(YoutubeIE, self).__init__(*args, **kwargs)
+ self._player_cache = {}
+
+ def report_video_info_webpage_download(self, video_id):
+ """Report attempt to download video info webpage."""
+ self.to_screen('%s: Downloading video info webpage' % video_id)
+
+ def report_information_extraction(self, video_id):
+ """Report attempt to extract video information."""
+ self.to_screen('%s: Extracting video information' % video_id)
+
+ def report_unavailable_format(self, video_id, format):
+ """Report extracted video URL."""
+ self.to_screen('%s: Format %s not available' % (video_id, format))
+
+ def report_rtmp_download(self):
+ """Indicate the download will use the RTMP protocol."""
+ self.to_screen('RTMP download detected')
+
+ def _signature_cache_id(self, example_sig):
+ """ Return a string representation of a signature """
+ return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
+
+ @classmethod
+ def _extract_player_info(cls, player_url):
+ for player_re in cls._PLAYER_INFO_RE:
+ id_m = re.search(player_re, player_url)
+ if id_m:
+ break
+ else:
+ raise ExtractorError('Cannot identify player %r' % player_url)
+ return id_m.group('ext'), id_m.group('id')
+
+ def _extract_signature_function(self, video_id, player_url, example_sig):
+ player_type, player_id = self._extract_player_info(player_url)
+
+ # Read from filesystem cache
+ func_id = '%s_%s_%s' % (
+ player_type, player_id, self._signature_cache_id(example_sig))
+ assert os.path.basename(func_id) == func_id
+
+ cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
+ if cache_spec is not None:
+ return lambda s: ''.join(s[i] for i in cache_spec)
+
+ download_note = (
+ 'Downloading player %s' % player_url
+ if self._downloader.params.get('verbose') else
+ 'Downloading %s player %s' % (player_type, player_id)
+ )
+ if player_type == 'js':
+ code = self._download_webpage(
+ player_url, video_id,
+ note=download_note,
+ errnote='Download of %s failed' % player_url)
+ res = self._parse_sig_js(code)
+ elif player_type == 'swf':
+ urlh = self._request_webpage(
+ player_url, video_id,
+ note=download_note,
+ errnote='Download of %s failed' % player_url)
+ code = urlh.read()
+ res = self._parse_sig_swf(code)
+ else:
+ assert False, 'Invalid player type %r' % player_type
+
+ test_string = ''.join(map(compat_chr, range(len(example_sig))))
+ cache_res = res(test_string)
+ cache_spec = [ord(c) for c in cache_res]
+
+ self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
+ return res
+
+ def _print_sig_code(self, func, example_sig):
+ def gen_sig_code(idxs):
+ def _genslice(start, end, step):
+ starts = '' if start == 0 else str(start)
+ ends = (':%d' % (end + step)) if end + step >= 0 else ':'
+ steps = '' if step == 1 else (':%d' % step)
+ return 's[%s%s%s]' % (starts, ends, steps)
+
+ step = None
+ # Quelch pyflakes warnings - start will be set when step is set
+ start = '(Never used)'
+ for i, prev in zip(idxs[1:], idxs[:-1]):
+ if step is not None:
+ if i - prev == step:
+ continue
+ yield _genslice(start, prev, step)
+ step = None
+ continue
+ if i - prev in [-1, 1]:
+ step = i - prev
+ start = prev
+ continue
+ else:
+ yield 's[%d]' % prev
+ if step is None:
+ yield 's[%d]' % i
+ else:
+ yield _genslice(start, i, step)
+
+ test_string = ''.join(map(compat_chr, range(len(example_sig))))
+ cache_res = func(test_string)
+ cache_spec = [ord(c) for c in cache_res]
+ expr_code = ' + '.join(gen_sig_code(cache_spec))
+ signature_id_tuple = '(%s)' % (
+ ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
+ code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
+ ' return %s\n') % (signature_id_tuple, expr_code)
+ self.to_screen('Extracted signature function:\n' + code)
+
+ def _parse_sig_js(self, jscode):
+ funcname = self._search_regex(
+ (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
+ r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
+ # Obsolete patterns
+ r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
+ jscode, 'Initial JS player signature function name', group='sig')
+
+ jsi = JSInterpreter(jscode)
+ initial_function = jsi.extract_function(funcname)
+ return lambda s: initial_function([s])
+
+ def _parse_sig_swf(self, file_contents):
+ swfi = SWFInterpreter(file_contents)
+ TARGET_CLASSNAME = 'SignatureDecipher'
+ searched_class = swfi.extract_class(TARGET_CLASSNAME)
+ initial_function = swfi.extract_function(searched_class, 'decipher')
+ return lambda s: initial_function([s])
+
+ def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
+ """Turn the encrypted s field into a working signature"""
+
+ if player_url is None:
+ raise ExtractorError('Cannot decrypt signature without player_url')
+
+ if player_url.startswith('//'):
+ player_url = 'https:' + player_url
+ elif not re.match(r'https?://', player_url):
+ player_url = compat_urlparse.urljoin(
+ 'https://www.youtube.com', player_url)
+ try:
+ player_id = (player_url, self._signature_cache_id(s))
+ if player_id not in self._player_cache:
+ func = self._extract_signature_function(
+ video_id, player_url, s
+ )
+ self._player_cache[player_id] = func
+ func = self._player_cache[player_id]
+ if self._downloader.params.get('youtube_print_sig_code'):
+ self._print_sig_code(func, s)
+ return func(s)
+ except Exception as e:
+ tb = traceback.format_exc()
+ raise ExtractorError(
+ 'Signature extraction failed: ' + tb, cause=e)
+
+ def _get_subtitles(self, video_id, webpage, has_live_chat_replay):
+ try:
+ subs_doc = self._download_xml(
+ 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
+ video_id, note=False)
+ except ExtractorError as err:
+ self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
+ return {}
+
+ sub_lang_list = {}
+ for track in subs_doc.findall('track'):
+ lang = track.attrib['lang_code']
+ if lang in sub_lang_list:
+ continue
+ sub_formats = []
+ for ext in self._SUBTITLE_FORMATS:
+ params = compat_urllib_parse_urlencode({
+ 'lang': lang,
+ 'v': video_id,
+ 'fmt': ext,
+ 'name': track.attrib['name'].encode('utf-8'),
+ })
+ sub_formats.append({
+ 'url': 'https://www.youtube.com/api/timedtext?' + params,
+ 'ext': ext,
+ })
+ sub_lang_list[lang] = sub_formats
+ if has_live_chat_replay:
+ sub_lang_list['live_chat'] = [
+ {
+ 'video_id': video_id,
+ 'ext': 'json',
+ 'protocol': 'youtube_live_chat_replay',
+ },
+ ]
+ if not sub_lang_list:
+ self._downloader.report_warning('video doesn\'t have subtitles')
+ return {}
+ return sub_lang_list
+
+ def _get_ytplayer_config(self, video_id, webpage):
+ patterns = (
+ # User data may contain arbitrary character sequences that may affect
+ # JSON extraction with regex, e.g. when '};' is contained the second
+ # regex won't capture the whole JSON. Yet working around by trying more
+ # concrete regex first keeping in mind proper quoted string handling
+ # to be implemented in future that will replace this workaround (see
+ # https://github.com/ytdl-org/youtube-dl/issues/7468,
+ # https://github.com/ytdl-org/youtube-dl/pull/7599)
+ r';ytplayer\.config\s*=\s*({.+?});ytplayer',
+ r';ytplayer\.config\s*=\s*({.+?});',
+ )
+ config = self._search_regex(
+ patterns, webpage, 'ytplayer.config', default=None)
+ if config:
+ return self._parse_json(
+ uppercase_escape(config), video_id, fatal=False)
+
+ def _get_yt_initial_data(self, video_id, webpage):
+ config = self._search_regex(
+ (r'window\["ytInitialData"\]\s*=\s*(.*?)(?<=});',
+ r'var\s+ytInitialData\s*=\s*(.*?)(?<=});'),
+ webpage, 'ytInitialData', default=None)
+ if config:
+ return self._parse_json(
+ uppercase_escape(config), video_id, fatal=False)
+
+ def _get_automatic_captions(self, video_id, webpage):
+ """We need the webpage for getting the captions url, pass it as an
+ argument to speed up the process."""
+ self.to_screen('%s: Looking for automatic captions' % video_id)
+ player_config = self._get_ytplayer_config(video_id, webpage)
+ err_msg = 'Couldn\'t find automatic captions for %s' % video_id
+ if not player_config:
+ self._downloader.report_warning(err_msg)
+ return {}
+ try:
+ args = player_config['args']
+ caption_url = args.get('ttsurl')
+ if caption_url:
+ timestamp = args['timestamp']
+ # We get the available subtitles
+ list_params = compat_urllib_parse_urlencode({
+ 'type': 'list',
+ 'tlangs': 1,
+ 'asrs': 1,
+ })
+ list_url = caption_url + '&' + list_params
+ caption_list = self._download_xml(list_url, video_id)
+ original_lang_node = caption_list.find('track')
+ if original_lang_node is None:
+ self._downloader.report_warning('Video doesn\'t have automatic captions')
+ return {}
+ original_lang = original_lang_node.attrib['lang_code']
+ caption_kind = original_lang_node.attrib.get('kind', '')
+
+ sub_lang_list = {}
+ for lang_node in caption_list.findall('target'):
+ sub_lang = lang_node.attrib['lang_code']
+ sub_formats = []
+ for ext in self._SUBTITLE_FORMATS:
+ params = compat_urllib_parse_urlencode({
+ 'lang': original_lang,
+ 'tlang': sub_lang,
+ 'fmt': ext,
+ 'ts': timestamp,
+ 'kind': caption_kind,
+ })
+ sub_formats.append({
+ 'url': caption_url + '&' + params,
+ 'ext': ext,
+ })
+ sub_lang_list[sub_lang] = sub_formats
+ return sub_lang_list
+
+ def make_captions(sub_url, sub_langs):
+ parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
+ caption_qs = compat_parse_qs(parsed_sub_url.query)
+ captions = {}
+ for sub_lang in sub_langs:
+ sub_formats = []
+ for ext in self._SUBTITLE_FORMATS:
+ caption_qs.update({
+ 'tlang': [sub_lang],
+ 'fmt': [ext],
+ })
+ sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
+ query=compat_urllib_parse_urlencode(caption_qs, True)))
+ sub_formats.append({
+ 'url': sub_url,
+ 'ext': ext,
+ })
+ captions[sub_lang] = sub_formats
+ return captions
+
+ # New captions format as of 22.06.2017
+ player_response = args.get('player_response')
+ if player_response and isinstance(player_response, compat_str):
+ player_response = self._parse_json(
+ player_response, video_id, fatal=False)
+ if player_response:
+ renderer = player_response['captions']['playerCaptionsTracklistRenderer']
+ base_url = renderer['captionTracks'][0]['baseUrl']
+ sub_lang_list = []
+ for lang in renderer['translationLanguages']:
+ lang_code = lang.get('languageCode')
+ if lang_code:
+ sub_lang_list.append(lang_code)
+ return make_captions(base_url, sub_lang_list)
+
+ # Some videos don't provide ttsurl but rather caption_tracks and
+ # caption_translation_languages (e.g. 20LmZk1hakA)
+ # Does not used anymore as of 22.06.2017
+ caption_tracks = args['caption_tracks']
+ caption_translation_languages = args['caption_translation_languages']
+ caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
+ sub_lang_list = []
+ for lang in caption_translation_languages.split(','):
+ lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
+ sub_lang = lang_qs.get('lc', [None])[0]
+ if sub_lang:
+ sub_lang_list.append(sub_lang)
+ return make_captions(caption_url, sub_lang_list)
+ # An extractor error can be raise by the download process if there are
+ # no automatic captions but there are subtitles
+ except (KeyError, IndexError, ExtractorError):
+ self._downloader.report_warning(err_msg)
+ return {}
+
+ def _mark_watched(self, video_id, video_info, player_response):
+ playback_url = url_or_none(try_get(
+ player_response,
+ lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
+ video_info, lambda x: x['videostats_playback_base_url'][0]))
+ if not playback_url:
+ return
+ parsed_playback_url = compat_urlparse.urlparse(playback_url)
+ qs = compat_urlparse.parse_qs(parsed_playback_url.query)
+
+ # cpn generation algorithm is reverse engineered from base.js.
+ # In fact it works even with dummy cpn.
+ CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
+ cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
+
+ qs.update({
+ 'ver': ['2'],
+ 'cpn': [cpn],
+ })
+ playback_url = compat_urlparse.urlunparse(
+ parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
+
+ self._download_webpage(
+ playback_url, video_id, 'Marking watched',
+ 'Unable to mark watched', fatal=False)
+
+ @staticmethod
+ def _extract_urls(webpage):
+ # Embedded YouTube player
+ entries = [
+ unescapeHTML(mobj.group('url'))
+ for mobj in re.finditer(r'''(?x)
+ (?:
+ <iframe[^>]+?src=|
+ data-video-url=|
+ <embed[^>]+?src=|
+ embedSWF\(?:\s*|
+ <object[^>]+data=|
+ new\s+SWFObject\(
+ )
+ (["\'])
+ (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
+ (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
+ \1''', webpage)]
+
+ # lazyYT YouTube embed
+ entries.extend(list(map(
+ unescapeHTML,
+ re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
+
+ # Wordpress "YouTube Video Importer" plugin
+ matches = re.findall(r'''(?x)<div[^>]+
+ class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
+ data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
+ entries.extend(m[-1] for m in matches)
+
+ return entries
+
+ @staticmethod
+ def _extract_url(webpage):
+ urls = YoutubeIE._extract_urls(webpage)
+ return urls[0] if urls else None
+
+ @classmethod
+ def extract_id(cls, url):
+ mobj = re.match(cls._VALID_URL, url, re.VERBOSE)
+ if mobj is None:
+ raise ExtractorError('Invalid URL: %s' % url)
+ video_id = mobj.group(2)
+ return video_id
+
+ def _extract_chapters_from_json(self, webpage, video_id, duration):
+ if not webpage:
+ return
+ initial_data = self._parse_json(
+ self._search_regex(
+ r'window\["ytInitialData"\] = (.+);\n', webpage,
+ 'player args', default='{}'),
+ video_id, fatal=False)
+ if not initial_data or not isinstance(initial_data, dict):
+ return
+ chapters_list = try_get(
+ initial_data,
+ lambda x: x['playerOverlays']
+ ['playerOverlayRenderer']
+ ['decoratedPlayerBarRenderer']
+ ['decoratedPlayerBarRenderer']
+ ['playerBar']
+ ['chapteredPlayerBarRenderer']
+ ['chapters'],
+ list)
+ if not chapters_list:
+ return
+
+ def chapter_time(chapter):
+ return float_or_none(
+ try_get(
+ chapter,
+ lambda x: x['chapterRenderer']['timeRangeStartMillis'],
+ int),
+ scale=1000)
+ chapters = []
+ for next_num, chapter in enumerate(chapters_list, start=1):
+ start_time = chapter_time(chapter)
+ if start_time is None:
+ continue
+ end_time = (chapter_time(chapters_list[next_num])
+ if next_num < len(chapters_list) else duration)
+ if end_time is None:
+ continue
+ title = try_get(
+ chapter, lambda x: x['chapterRenderer']['title']['simpleText'],
+ compat_str)
+ chapters.append({
+ 'start_time': start_time,
+ 'end_time': end_time,
+ 'title': title,
+ })
+ return chapters
+
+ @staticmethod
+ def _extract_chapters_from_description(description, duration):
+ if not description:
+ return None
+ chapter_lines = re.findall(
+ r'(?:^|<br\s*/>)([^<]*<a[^>]+onclick=["\']yt\.www\.watch\.player\.seekTo[^>]+>(\d{1,2}:\d{1,2}(?::\d{1,2})?)</a>[^>]*)(?=$|<br\s*/>)',
+ description)
+ if not chapter_lines:
+ return None
+ chapters = []
+ for next_num, (chapter_line, time_point) in enumerate(
+ chapter_lines, start=1):
+ start_time = parse_duration(time_point)
+ if start_time is None:
+ continue
+ if start_time > duration:
+ break
+ end_time = (duration if next_num == len(chapter_lines)
+ else parse_duration(chapter_lines[next_num][1]))
+ if end_time is None:
+ continue
+ if end_time > duration:
+ end_time = duration
+ if start_time > end_time:
+ break
+ chapter_title = re.sub(
+ r'<a[^>]+>[^<]+</a>', '', chapter_line).strip(' \t-')
+ chapter_title = re.sub(r'\s+', ' ', chapter_title)
+ chapters.append({
+ 'start_time': start_time,
+ 'end_time': end_time,
+ 'title': chapter_title,
+ })
+ return chapters
+
+ def _extract_chapters(self, webpage, description, video_id, duration):
+ return (self._extract_chapters_from_json(webpage, video_id, duration)
+ or self._extract_chapters_from_description(description, duration))
+
+ def _real_extract(self, url):
+ url, smuggled_data = unsmuggle_url(url, {})
+
+ proto = (
+ 'http' if self._downloader.params.get('prefer_insecure', False)
+ else 'https')
+
+ start_time = None
+ end_time = None
+ parsed_url = compat_urllib_parse_urlparse(url)
+ for component in [parsed_url.fragment, parsed_url.query]:
+ query = compat_parse_qs(component)
+ if start_time is None and 't' in query:
+ start_time = parse_duration(query['t'][0])
+ if start_time is None and 'start' in query:
+ start_time = parse_duration(query['start'][0])
+ if end_time is None and 'end' in query:
+ end_time = parse_duration(query['end'][0])
+
+ # Extract original video URL from URL with redirection, like age verification, using next_url parameter
+ mobj = re.search(self._NEXT_URL_RE, url)
+ if mobj:
+ url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/')
+ video_id = self.extract_id(url)
+
+ # Get video webpage
+ url = proto + '://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1&bpctr=9999999999' % video_id
+ video_webpage, urlh = self._download_webpage_handle(url, video_id)
+
+ qs = compat_parse_qs(compat_urllib_parse_urlparse(urlh.geturl()).query)
+ video_id = qs.get('v', [None])[0] or video_id
+
+ # Attempt to extract SWF player URL
+ mobj = re.search(r'swfConfig.*?"(https?:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
+ if mobj is not None:
+ player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
+ else:
+ player_url = None
+
+ dash_mpds = []
+
+ def add_dash_mpd(video_info):
+ dash_mpd = video_info.get('dashmpd')
+ if dash_mpd and dash_mpd[0] not in dash_mpds:
+ dash_mpds.append(dash_mpd[0])
+
+ def add_dash_mpd_pr(pl_response):
+ dash_mpd = url_or_none(try_get(
+ pl_response, lambda x: x['streamingData']['dashManifestUrl'],
+ compat_str))
+ if dash_mpd and dash_mpd not in dash_mpds:
+ dash_mpds.append(dash_mpd)
+
+ is_live = None
+ view_count = None
+
+ def extract_view_count(v_info):
+ return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
+
+ def extract_player_response(player_response, video_id):
+ pl_response = str_or_none(player_response)
+ if not pl_response:
+ return
+ pl_response = self._parse_json(pl_response, video_id, fatal=False)
+ if isinstance(pl_response, dict):
+ add_dash_mpd_pr(pl_response)
+ return pl_response
+
+ player_response = {}
+
+ # Get video info
+ video_info = {}
+ embed_webpage = None
+ if self._html_search_meta('og:restrictions:age', video_webpage, default=None) == "18+":
+ age_gate = True
+ # We simulate the access to the video from www.youtube.com/v/{video_id}
+ # this can be viewed without login into Youtube
+ url = proto + '://www.youtube.com/embed/%s' % video_id
+ embed_webpage = self._download_webpage(url, video_id, 'Downloading embed webpage')
+ data = compat_urllib_parse_urlencode({
+ 'video_id': video_id,
+ 'eurl': 'https://youtube.googleapis.com/v/' + video_id,
+ 'sts': self._search_regex(
+ r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
+ })
+ video_info_url = proto + '://www.youtube.com/get_video_info?' + data
+ try:
+ video_info_webpage = self._download_webpage(
+ video_info_url, video_id,
+ note='Refetching age-gated info webpage',
+ errnote='unable to download video info webpage')
+ except ExtractorError:
+ video_info_webpage = None
+ if video_info_webpage:
+ video_info = compat_parse_qs(video_info_webpage)
+ pl_response = video_info.get('player_response', [None])[0]
+ player_response = extract_player_response(pl_response, video_id)
+ add_dash_mpd(video_info)
+ view_count = extract_view_count(video_info)
+ else:
+ age_gate = False
+ # Try looking directly into the video webpage
+ ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
+ if ytplayer_config:
+ args = ytplayer_config['args']
+ if args.get('url_encoded_fmt_stream_map') or args.get('hlsvp'):
+ # Convert to the same format returned by compat_parse_qs
+ video_info = dict((k, [v]) for k, v in args.items())
+ add_dash_mpd(video_info)
+ # Rental video is not rented but preview is available (e.g.
+ # https://www.youtube.com/watch?v=yYr8q0y5Jfg,
+ # https://github.com/ytdl-org/youtube-dl/issues/10532)
+ if not video_info and args.get('ypc_vid'):
+ return self.url_result(
+ args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
+ if args.get('livestream') == '1' or args.get('live_playback') == 1:
+ is_live = True
+ if not player_response:
+ player_response = extract_player_response(args.get('player_response'), video_id)
+ if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
+ add_dash_mpd_pr(player_response)
+
+ def extract_unavailable_message():
+ messages = []
+ for tag, kind in (('h1', 'message'), ('div', 'submessage')):
+ msg = self._html_search_regex(
+ r'(?s)<{tag}[^>]+id=["\']unavailable-{kind}["\'][^>]*>(.+?)</{tag}>'.format(tag=tag, kind=kind),
+ video_webpage, 'unavailable %s' % kind, default=None)
+ if msg:
+ messages.append(msg)
+ if messages:
+ return '\n'.join(messages)
+
+ if not video_info and not player_response:
+ unavailable_message = extract_unavailable_message()
+ if not unavailable_message:
+ unavailable_message = 'Unable to extract video data'
+ raise ExtractorError(
+ 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
+
+ if not isinstance(video_info, dict):
+ video_info = {}
+
+ video_details = try_get(
+ player_response, lambda x: x['videoDetails'], dict) or {}
+
+ microformat = try_get(
+ player_response, lambda x: x['microformat']['playerMicroformatRenderer'], dict) or {}
+
+ video_title = video_info.get('title', [None])[0] or video_details.get('title')
+ if not video_title:
+ self._downloader.report_warning('Unable to extract video title')
+ video_title = '_'
+
+ description_original = video_description = get_element_by_id("eow-description", video_webpage)
+ if video_description:
+
+ def replace_url(m):
+ redir_url = compat_urlparse.urljoin(url, m.group(1))
+ parsed_redir_url = compat_urllib_parse_urlparse(redir_url)
+ if re.search(r'^(?:www\.)?(?:youtube(?:-nocookie)?\.com|youtu\.be)$', parsed_redir_url.netloc) and parsed_redir_url.path == '/redirect':
+ qs = compat_parse_qs(parsed_redir_url.query)
+ q = qs.get('q')
+ if q and q[0]:
+ return q[0]
+ return redir_url
+
+ description_original = video_description = re.sub(r'''(?x)
+ <a\s+
+ (?:[a-zA-Z-]+="[^"]*"\s+)*?
+ (?:title|href)="([^"]+)"\s+
+ (?:[a-zA-Z-]+="[^"]*"\s+)*?
+ class="[^"]*"[^>]*>
+ [^<]+\.{3}\s*
+ </a>
+ ''', replace_url, video_description)
+ video_description = clean_html(video_description)
+ else:
+ video_description = video_details.get('shortDescription') or self._html_search_meta('description', video_webpage)
+
+ if not smuggled_data.get('force_singlefeed', False):
+ if not self._downloader.params.get('noplaylist'):
+ multifeed_metadata_list = try_get(
+ player_response,
+ lambda x: x['multicamera']['playerLegacyMulticameraRenderer']['metadataList'],
+ compat_str) or try_get(
+ video_info, lambda x: x['multifeed_metadata_list'][0], compat_str)
+ if multifeed_metadata_list:
+ entries = []
+ feed_ids = []
+ for feed in multifeed_metadata_list.split(','):
+ # Unquote should take place before split on comma (,) since textual
+ # fields may contain comma as well (see
+ # https://github.com/ytdl-org/youtube-dl/issues/8536)
+ feed_data = compat_parse_qs(compat_urllib_parse_unquote_plus(feed))
+
+ def feed_entry(name):
+ return try_get(feed_data, lambda x: x[name][0], compat_str)
+
+ feed_id = feed_entry('id')
+ if not feed_id:
+ continue
+ feed_title = feed_entry('title')
+ title = video_title
+ if feed_title:
+ title += ' (%s)' % feed_title
+ entries.append({
+ '_type': 'url_transparent',
+ 'ie_key': 'Youtube',
+ 'url': smuggle_url(
+ '%s://www.youtube.com/watch?v=%s' % (proto, feed_data['id'][0]),
+ {'force_singlefeed': True}),
+ 'title': title,
+ })
+ feed_ids.append(feed_id)
+ self.to_screen(
+ 'Downloading multifeed video (%s) - add --no-playlist to just download video %s'
+ % (', '.join(feed_ids), video_id))
+ return self.playlist_result(entries, video_id, video_title, video_description)
+ else:
+ self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+
+ if view_count is None:
+ view_count = extract_view_count(video_info)
+ if view_count is None and video_details:
+ view_count = int_or_none(video_details.get('viewCount'))
+ if view_count is None and microformat:
+ view_count = int_or_none(microformat.get('viewCount'))
+
+ if is_live is None:
+ is_live = bool_or_none(video_details.get('isLive'))
+
+ has_live_chat_replay = False
+ if not is_live:
+ yt_initial_data = self._get_yt_initial_data(video_id, video_webpage)
+ try:
+ yt_initial_data['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']
+ has_live_chat_replay = True
+ except (KeyError, IndexError, TypeError):
+ pass
+
+ # Check for "rental" videos
+ if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
+ raise ExtractorError('"rental" videos not supported. See https://github.com/ytdl-org/youtube-dl/issues/359 for more information.', expected=True)
+
+ def _extract_filesize(media_url):
+ return int_or_none(self._search_regex(
+ r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
+
+ streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
+ streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
+
+ if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
+ self.report_rtmp_download()
+ formats = [{
+ 'format_id': '_rtmp',
+ 'protocol': 'rtmp',
+ 'url': video_info['conn'][0],
+ 'player_url': player_url,
+ }]
+ elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
+ encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
+ if 'rtmpe%3Dyes' in encoded_url_map:
+ raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
+ formats = []
+ formats_spec = {}
+ fmt_list = video_info.get('fmt_list', [''])[0]
+ if fmt_list:
+ for fmt in fmt_list.split(','):
+ spec = fmt.split('/')
+ if len(spec) > 1:
+ width_height = spec[1].split('x')
+ if len(width_height) == 2:
+ formats_spec[spec[0]] = {
+ 'resolution': spec[1],
+ 'width': int_or_none(width_height[0]),
+ 'height': int_or_none(width_height[1]),
+ }
+ for fmt in streaming_formats:
+ itag = str_or_none(fmt.get('itag'))
+ if not itag:
+ continue
+ quality = fmt.get('quality')
+ quality_label = fmt.get('qualityLabel') or quality
+ formats_spec[itag] = {
+ 'asr': int_or_none(fmt.get('audioSampleRate')),
+ 'filesize': int_or_none(fmt.get('contentLength')),
+ 'format_note': quality_label,
+ 'fps': int_or_none(fmt.get('fps')),
+ 'height': int_or_none(fmt.get('height')),
+ # bitrate for itag 43 is always 2147483647
+ 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
+ 'width': int_or_none(fmt.get('width')),
+ }
+
+ for fmt in streaming_formats:
+ if fmt.get('drmFamilies') or fmt.get('drm_families'):
+ continue
+ url = url_or_none(fmt.get('url'))
+
+ if not url:
+ cipher = fmt.get('cipher') or fmt.get('signatureCipher')
+ if not cipher:
+ continue
+ url_data = compat_parse_qs(cipher)
+ url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
+ if not url:
+ continue
+ else:
+ cipher = None
+ url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+
+ stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
+ # Unsupported FORMAT_STREAM_TYPE_OTF
+ if stream_type == 3:
+ continue
+
+ format_id = fmt.get('itag') or url_data['itag'][0]
+ if not format_id:
+ continue
+ format_id = compat_str(format_id)
+
+ if cipher:
+ if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
+ ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
+ jsplayer_url_json = self._search_regex(
+ ASSETS_RE,
+ embed_webpage if age_gate else video_webpage,
+ 'JS player URL (1)', default=None)
+ if not jsplayer_url_json and not age_gate:
+ # We need the embed website after all
+ if embed_webpage is None:
+ embed_url = proto + '://www.youtube.com/embed/%s' % video_id
+ embed_webpage = self._download_webpage(
+ embed_url, video_id, 'Downloading embed webpage')
+ jsplayer_url_json = self._search_regex(
+ ASSETS_RE, embed_webpage, 'JS player URL')
+
+ player_url = json.loads(jsplayer_url_json)
+ if player_url is None:
+ player_url_json = self._search_regex(
+ r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
+ video_webpage, 'age gate player URL')
+ player_url = json.loads(player_url_json)
+
+ if 'sig' in url_data:
+ url += '&signature=' + url_data['sig'][0]
+ elif 's' in url_data:
+ encrypted_sig = url_data['s'][0]
+
+ if self._downloader.params.get('verbose'):
+ if player_url is None:
+ player_desc = 'unknown'
+ else:
+ player_type, player_version = self._extract_player_info(player_url)
+ player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
+ parts_sizes = self._signature_cache_id(encrypted_sig)
+ self.to_screen('{%s} signature length %s, %s' %
+ (format_id, parts_sizes, player_desc))
+
+ signature = self._decrypt_signature(
+ encrypted_sig, video_id, player_url, age_gate)
+ sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
+ url += '&%s=%s' % (sp, signature)
+ if 'ratebypass' not in url:
+ url += '&ratebypass=yes'
+
+ dct = {
+ 'format_id': format_id,
+ 'url': url,
+ 'player_url': player_url,
+ }
+ if format_id in self._formats:
+ dct.update(self._formats[format_id])
+ if format_id in formats_spec:
+ dct.update(formats_spec[format_id])
+
+ # Some itags are not included in DASH manifest thus corresponding formats will
+ # lack metadata (see https://github.com/ytdl-org/youtube-dl/pull/5993).
+ # Trying to extract metadata from url_encoded_fmt_stream_map entry.
+ mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
+ width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
+
+ if width is None:
+ width = int_or_none(fmt.get('width'))
+ if height is None:
+ height = int_or_none(fmt.get('height'))
+
+ filesize = int_or_none(url_data.get(
+ 'clen', [None])[0]) or _extract_filesize(url)
+
+ quality = url_data.get('quality', [None])[0] or fmt.get('quality')
+ quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
+
+ tbr = (float_or_none(url_data.get('bitrate', [None])[0], 1000)
+ or float_or_none(fmt.get('bitrate'), 1000)) if format_id != '43' else None
+ fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
+
+ more_fields = {
+ 'filesize': filesize,
+ 'tbr': tbr,
+ 'width': width,
+ 'height': height,
+ 'fps': fps,
+ 'format_note': quality_label or quality,
+ }
+ for key, value in more_fields.items():
+ if value:
+ dct[key] = value
+ type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
+ if type_:
+ type_split = type_.split(';')
+ kind_ext = type_split[0].split('/')
+ if len(kind_ext) == 2:
+ kind, _ = kind_ext
+ dct['ext'] = mimetype2ext(type_split[0])
+ if kind in ('audio', 'video'):
+ codecs = None
+ for mobj in re.finditer(
+ r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
+ if mobj.group('key') == 'codecs':
+ codecs = mobj.group('val')
+ break
+ if codecs:
+ dct.update(parse_codecs(codecs))
+ if dct.get('acodec') == 'none' or dct.get('vcodec') == 'none':
+ dct['downloader_options'] = {
+ # Youtube throttles chunks >~10M
+ 'http_chunk_size': 10485760,
+ }
+ formats.append(dct)
+ else:
+ manifest_url = (
+ url_or_none(try_get(
+ player_response,
+ lambda x: x['streamingData']['hlsManifestUrl'],
+ compat_str))
+ or url_or_none(try_get(
+ video_info, lambda x: x['hlsvp'][0], compat_str)))
+ if manifest_url:
+ formats = []
+ m3u8_formats = self._extract_m3u8_formats(
+ manifest_url, video_id, 'mp4', fatal=False)
+ for a_format in m3u8_formats:
+ itag = self._search_regex(
+ r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
+ if itag:
+ a_format['format_id'] = itag
+ if itag in self._formats:
+ dct = self._formats[itag].copy()
+ dct.update(a_format)
+ a_format = dct
+ a_format['player_url'] = player_url
+ # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
+ a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
+ formats.append(a_format)
+ else:
+ error_message = extract_unavailable_message()
+ if not error_message:
+ error_message = clean_html(try_get(
+ player_response, lambda x: x['playabilityStatus']['reason'],
+ compat_str))
+ if not error_message:
+ error_message = clean_html(
+ try_get(video_info, lambda x: x['reason'][0], compat_str))
+ if error_message:
+ raise ExtractorError(error_message, expected=True)
+ raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info')
+
+ # uploader
+ video_uploader = try_get(
+ video_info, lambda x: x['author'][0],
+ compat_str) or str_or_none(video_details.get('author'))
+ if video_uploader:
+ video_uploader = compat_urllib_parse_unquote_plus(video_uploader)
+ else:
+ self._downloader.report_warning('unable to extract uploader name')
+
+ # uploader_id
+ video_uploader_id = None
+ video_uploader_url = None
+ mobj = re.search(
+ r'<link itemprop="url" href="(?P<uploader_url>https?://www\.youtube\.com/(?:user|channel)/(?P<uploader_id>[^"]+))">',
+ video_webpage)
+ if mobj is not None:
+ video_uploader_id = mobj.group('uploader_id')
+ video_uploader_url = mobj.group('uploader_url')
+ else:
+ owner_profile_url = url_or_none(microformat.get('ownerProfileUrl'))
+ if owner_profile_url:
+ video_uploader_id = self._search_regex(
+ r'(?:user|channel)/([^/]+)', owner_profile_url, 'uploader id',
+ default=None)
+ video_uploader_url = owner_profile_url
+
+ channel_id = (
+ str_or_none(video_details.get('channelId'))
+ or self._html_search_meta(
+ 'channelId', video_webpage, 'channel id', default=None)
+ or self._search_regex(
+ r'data-channel-external-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
+ video_webpage, 'channel id', default=None, group='id'))
+ channel_url = 'http://www.youtube.com/channel/%s' % channel_id if channel_id else None
+
+ thumbnails = []
+ thumbnails_list = try_get(
+ video_details, lambda x: x['thumbnail']['thumbnails'], list) or []
+ for t in thumbnails_list:
+ if not isinstance(t, dict):
+ continue
+ thumbnail_url = url_or_none(t.get('url'))
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'width': int_or_none(t.get('width')),
+ 'height': int_or_none(t.get('height')),
+ })
+
+ if not thumbnails:
+ video_thumbnail = None
+ # We try first to get a high quality image:
+ m_thumb = re.search(r'<span itemprop="thumbnail".*?href="(.*?)">',
+ video_webpage, re.DOTALL)
+ if m_thumb is not None:
+ video_thumbnail = m_thumb.group(1)
+ thumbnail_url = try_get(video_info, lambda x: x['thumbnail_url'][0], compat_str)
+ if thumbnail_url:
+ video_thumbnail = compat_urllib_parse_unquote_plus(thumbnail_url)
+ if video_thumbnail:
+ thumbnails.append({'url': video_thumbnail})
+
+ # upload date
+ upload_date = self._html_search_meta(
+ 'datePublished', video_webpage, 'upload date', default=None)
+ if not upload_date:
+ upload_date = self._search_regex(
+ [r'(?s)id="eow-date.*?>(.*?)</span>',
+ r'(?:id="watch-uploader-info".*?>.*?|["\']simpleText["\']\s*:\s*["\'])(?:Published|Uploaded|Streamed live|Started) on (.+?)[<"\']'],
+ video_webpage, 'upload date', default=None)
+ if not upload_date:
+ upload_date = microformat.get('publishDate') or microformat.get('uploadDate')
+ upload_date = unified_strdate(upload_date)
+
+ video_license = self._html_search_regex(
+ r'<h4[^>]+class="title"[^>]*>\s*License\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li',
+ video_webpage, 'license', default=None)
+
+ m_music = re.search(
+ r'''(?x)
+ <h4[^>]+class="title"[^>]*>\s*Music\s*</h4>\s*
+ <ul[^>]*>\s*
+ <li>(?P<title>.+?)
+ by (?P<creator>.+?)
+ (?:
+ \(.+?\)|
+ <a[^>]*
+ (?:
+ \bhref=["\']/red[^>]*>| # drop possible
+ >\s*Listen ad-free with YouTube Red # YouTube Red ad
+ )
+ .*?
+ )?</li
+ ''',
+ video_webpage)
+ if m_music:
+ video_alt_title = remove_quotes(unescapeHTML(m_music.group('title')))
+ video_creator = clean_html(m_music.group('creator'))
+ else:
+ video_alt_title = video_creator = None
+
+ def extract_meta(field):
+ return self._html_search_regex(
+ r'<h4[^>]+class="title"[^>]*>\s*%s\s*</h4>\s*<ul[^>]*>\s*<li>(.+?)</li>\s*' % field,
+ video_webpage, field, default=None)
+
+ track = extract_meta('Song')
+ artist = extract_meta('Artist')
+ album = extract_meta('Album')
+
+ # Youtube Music Auto-generated description
+ release_date = release_year = None
+ if video_description:
+ mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description)
+ if mobj:
+ if not track:
+ track = mobj.group('track').strip()
+ if not artist:
+ artist = mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·'))
+ if not album:
+ album = mobj.group('album'.strip())
+ release_year = mobj.group('release_year')
+ release_date = mobj.group('release_date')
+ if release_date:
+ release_date = release_date.replace('-', '')
+ if not release_year:
+ release_year = int(release_date[:4])
+ if release_year:
+ release_year = int(release_year)
+
+ m_episode = re.search(
+ r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',
+ video_webpage)
+ if m_episode:
+ series = unescapeHTML(m_episode.group('series'))
+ season_number = int(m_episode.group('season'))
+ episode_number = int(m_episode.group('episode'))
+ else:
+ series = season_number = episode_number = None
+
+ m_cat_container = self._search_regex(
+ r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
+ video_webpage, 'categories', default=None)
+ category = None
+ if m_cat_container:
+ category = self._html_search_regex(
+ r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
+ default=None)
+ if not category:
+ category = try_get(
+ microformat, lambda x: x['category'], compat_str)
+ video_categories = None if category is None else [category]
+
+ video_tags = [
+ unescapeHTML(m.group('content'))
+ for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
+ if not video_tags:
+ video_tags = try_get(video_details, lambda x: x['keywords'], list)
+
+ def _extract_count(count_name):
+ return str_to_int(self._search_regex(
+ r'-%s-button[^>]+><span[^>]+class="yt-uix-button-content"[^>]*>([\d,]+)</span>'
+ % re.escape(count_name),
+ video_webpage, count_name, default=None))
+
+ like_count = _extract_count('like')
+ dislike_count = _extract_count('dislike')
+
+ if view_count is None:
+ view_count = str_to_int(self._search_regex(
+ r'<[^>]+class=["\']watch-view-count[^>]+>\s*([\d,\s]+)', video_webpage,
+ 'view count', default=None))
+
+ average_rating = (
+ float_or_none(video_details.get('averageRating'))
+ or try_get(video_info, lambda x: float_or_none(x['avg_rating'][0])))
+
+ # subtitles
+ video_subtitles = self.extract_subtitles(
+ video_id, video_webpage, has_live_chat_replay)
+ automatic_captions = self.extract_automatic_captions(video_id, video_webpage)
+
+ video_duration = try_get(
+ video_info, lambda x: int_or_none(x['length_seconds'][0]))
+ if not video_duration:
+ video_duration = int_or_none(video_details.get('lengthSeconds'))
+ if not video_duration:
+ video_duration = parse_duration(self._html_search_meta(
+ 'duration', video_webpage, 'video duration'))
+
+ # annotations
+ video_annotations = None
+ if self._downloader.params.get('writeannotations', False):
+ xsrf_token = self._search_regex(
+ r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2',
+ video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
+ invideo_url = try_get(
+ player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
+ if xsrf_token and invideo_url:
+ xsrf_field_name = self._search_regex(
+ r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2',
+ video_webpage, 'xsrf field name',
+ group='xsrf_field_name', default='session_token')
+ video_annotations = self._download_webpage(
+ self._proto_relative_url(invideo_url),
+ video_id, note='Downloading annotations',
+ errnote='Unable to download video annotations', fatal=False,
+ data=urlencode_postdata({xsrf_field_name: xsrf_token}))
+
+ chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
+
+ # Look for the DASH manifest
+ if self._downloader.params.get('youtube_include_dash_manifest', True):
+ dash_mpd_fatal = True
+ for mpd_url in dash_mpds:
+ dash_formats = {}
+ try:
+ def decrypt_sig(mobj):
+ s = mobj.group(1)
+ dec_s = self._decrypt_signature(s, video_id, player_url, age_gate)
+ return '/signature/%s' % dec_s
+
+ mpd_url = re.sub(r'/s/([a-fA-F0-9\.]+)', decrypt_sig, mpd_url)
+
+ for df in self._extract_mpd_formats(
+ mpd_url, video_id, fatal=dash_mpd_fatal,
+ formats_dict=self._formats):
+ if not df.get('filesize'):
+ df['filesize'] = _extract_filesize(df['url'])
+ # Do not overwrite DASH format found in some previous DASH manifest
+ if df['format_id'] not in dash_formats:
+ dash_formats[df['format_id']] = df
+ # Additional DASH manifests may end up in HTTP Error 403 therefore
+ # allow them to fail without bug report message if we already have
+ # some DASH manifest succeeded. This is temporary workaround to reduce
+ # burst of bug reports until we figure out the reason and whether it
+ # can be fixed at all.
+ dash_mpd_fatal = False
+ except (ExtractorError, KeyError) as e:
+ self.report_warning(
+ 'Skipping DASH manifest: %r' % e, video_id)
+ if dash_formats:
+ # Remove the formats we found through non-DASH, they
+ # contain less info and it can be wrong, because we use
+ # fixed values (for example the resolution). See
+ # https://github.com/ytdl-org/youtube-dl/issues/5774 for an
+ # example.
+ formats = [f for f in formats if f['format_id'] not in dash_formats.keys()]
+ formats.extend(dash_formats.values())
+
+ # Check for malformed aspect ratio
+ stretched_m = re.search(
+ r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">',
+ video_webpage)
+ if stretched_m:
+ w = float(stretched_m.group('w'))
+ h = float(stretched_m.group('h'))
+ # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0).
+ # We will only process correct ratios.
+ if w > 0 and h > 0:
+ ratio = w / h
+ for f in formats:
+ if f.get('vcodec') != 'none':
+ f['stretched_ratio'] = ratio
+
+ if not formats:
+ if 'reason' in video_info:
+ if 'The uploader has not made this video available in your country.' in video_info['reason']:
+ regions_allowed = self._html_search_meta(
+ 'regionsAllowed', video_webpage, default=None)
+ countries = regions_allowed.split(',') if regions_allowed else None
+ self.raise_geo_restricted(
+ msg=video_info['reason'][0], countries=countries)
+ reason = video_info['reason'][0]
+ if 'Invalid parameters' in reason:
+ unavailable_message = extract_unavailable_message()
+ if unavailable_message:
+ reason = unavailable_message
+ raise ExtractorError(
+ 'YouTube said: %s' % reason,
+ expected=True, video_id=video_id)
+ if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
+ raise ExtractorError('This video is DRM protected.', expected=True)
+
+ self._sort_formats(formats)
+
+ self.mark_watched(video_id, video_info, player_response)
+
+ return {
+ 'id': video_id,
+ 'uploader': video_uploader,
+ 'uploader_id': video_uploader_id,
+ 'uploader_url': video_uploader_url,
+ 'channel_id': channel_id,
+ 'channel_url': channel_url,
+ 'upload_date': upload_date,
+ 'license': video_license,
+ 'creator': video_creator or artist,
+ 'title': video_title,
+ 'alt_title': video_alt_title or track,
+ 'thumbnails': thumbnails,
+ 'description': video_description,
+ 'categories': video_categories,
+ 'tags': video_tags,
+ 'subtitles': video_subtitles,
+ 'automatic_captions': automatic_captions,
+ 'duration': video_duration,
+ 'age_limit': 18 if age_gate else 0,
+ 'annotations': video_annotations,
+ 'chapters': chapters,
+ 'webpage_url': proto + '://www.youtube.com/watch?v=%s' % video_id,
+ 'view_count': view_count,
+ 'like_count': like_count,
+ 'dislike_count': dislike_count,
+ 'average_rating': average_rating,
+ 'formats': formats,
+ 'is_live': is_live,
+ 'start_time': start_time,
+ 'end_time': end_time,
+ 'series': series,
+ 'season_number': season_number,
+ 'episode_number': episode_number,
+ 'track': track,
+ 'artist': artist,
+ 'album': album,
+ 'release_date': release_date,
+ 'release_year': release_year,
+ }
+
+
+class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
+ IE_DESC = 'YouTube.com playlists'
+ _VALID_URL = r"""(?x)(?:
+ (?:https?://)?
+ (?:\w+\.)?
+ (?:
+ (?:
+ youtube(?:kids)?\.com|
+ invidio\.us
+ )
+ /
+ (?:
+ (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
+ \? (?:.*?[&;])*? (?:p|a|list)=
+ | p/
+ )|
+ youtu\.be/[0-9A-Za-z_-]{11}\?.*?\blist=
+ )
+ (
+ (?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)?[0-9A-Za-z-_]{10,}
+ # Top tracks, they can also include dots
+ |(?:MC)[\w\.]*
+ )
+ .*
+ |
+ (%(playlist_id)s)
+ )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
+ _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s'
+ _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&amp;(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?'
+ _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
+ IE_NAME = 'youtube:playlist'
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
+ 'info_dict': {
+ 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
+ 'uploader': 'Sergey M.',
+ 'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
+ 'title': 'youtube-dl public playlist',
+ },
+ 'playlist_count': 1,
+ }, {
+ 'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
+ 'info_dict': {
+ 'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
+ 'uploader': 'Sergey M.',
+ 'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
+ 'title': 'youtube-dl empty playlist',
+ },
+ 'playlist_count': 0,
+ }, {
+ 'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
+ 'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
+ 'info_dict': {
+ 'title': '29C3: Not my department',
+ 'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
+ 'uploader': 'Christiaan008',
+ 'uploader_id': 'ChRiStIaAn008',
+ },
+ 'playlist_count': 96,
+ }, {
+ 'note': 'issue #673',
+ 'url': 'PLBB231211A4F62143',
+ 'info_dict': {
+ 'title': '[OLD]Team Fortress 2 (Class-based LP)',
+ 'id': 'PLBB231211A4F62143',
+ 'uploader': 'Wickydoo',
+ 'uploader_id': 'Wickydoo',
+ },
+ 'playlist_mincount': 26,
+ }, {
+ 'note': 'Large playlist',
+ 'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',
+ 'info_dict': {
+ 'title': 'Uploads from Cauchemar',
+ 'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',
+ 'uploader': 'Cauchemar',
+ 'uploader_id': 'Cauchemar89',
+ },
+ 'playlist_mincount': 799,
+ }, {
+ 'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
+ 'info_dict': {
+ 'title': 'YDL_safe_search',
+ 'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',
+ },
+ 'playlist_count': 2,
+ 'skip': 'This playlist is private',
+ }, {
+ 'note': 'embedded',
+ 'url': 'https://www.youtube.com/embed/videoseries?list=PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
+ 'playlist_count': 4,
+ 'info_dict': {
+ 'title': 'JODA15',
+ 'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
+ 'uploader': 'milan',
+ 'uploader_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
+ }
+ }, {
+ 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
+ 'playlist_mincount': 485,
+ 'info_dict': {
+ 'title': '2018 Chinese New Singles (11/6 updated)',
+ 'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
+ 'uploader': 'LBK',
+ 'uploader_id': 'sdragonfang',
+ }
+ }, {
+ 'note': 'Embedded SWF player',
+ 'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
+ 'playlist_count': 4,
+ 'info_dict': {
+ 'title': 'JODA7',
+ 'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',
+ },
+ 'skip': 'This playlist does not exist',
+ }, {
+ 'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',
+ 'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',
+ 'info_dict': {
+ 'title': 'Uploads from Interstellar Movie',
+ 'id': 'UUXw-G3eDE9trcvY2sBMM_aA',
+ 'uploader': 'Interstellar Movie',
+ 'uploader_id': 'InterstellarMovie1',
+ },
+ 'playlist_mincount': 21,
+ }, {
+ # Playlist URL that does not actually serve a playlist
+ 'url': 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4',
+ 'info_dict': {
+ 'id': 'FqZTN594JQw',
+ 'ext': 'webm',
+ 'title': "Smiley's People 01 detective, Adventure Series, Action",
+ 'uploader': 'STREEM',
+ 'uploader_id': 'UCyPhqAZgwYWZfxElWVbVJng',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCyPhqAZgwYWZfxElWVbVJng',
+ 'upload_date': '20150526',
+ 'license': 'Standard YouTube License',
+ 'description': 'md5:507cdcb5a49ac0da37a920ece610be80',
+ 'categories': ['People & Blogs'],
+ 'tags': list,
+ 'view_count': int,
+ 'like_count': int,
+ 'dislike_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'This video is not available.',
+ 'add_ie': [YoutubeIE.ie_key()],
+ }, {
+ 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5',
+ 'info_dict': {
+ 'id': 'yeWKywCrFtk',
+ 'ext': 'mp4',
+ 'title': 'Small Scale Baler and Braiding Rugs',
+ 'uploader': 'Backus-Page House Museum',
+ 'uploader_id': 'backuspagemuseum',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/backuspagemuseum',
+ 'upload_date': '20161008',
+ 'description': 'md5:800c0c78d5eb128500bffd4f0b4f2e8a',
+ 'categories': ['Nonprofits & Activism'],
+ 'tags': list,
+ 'like_count': int,
+ 'dislike_count': int,
+ },
+ 'params': {
+ 'noplaylist': True,
+ 'skip_download': True,
+ },
+ }, {
+ # https://github.com/ytdl-org/youtube-dl/issues/21844
+ 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
+ 'info_dict': {
+ 'title': 'Data Analysis with Dr Mike Pound',
+ 'id': 'PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
+ 'uploader_id': 'Computerphile',
+ 'uploader': 'Computerphile',
+ },
+ 'playlist_mincount': 11,
+ }, {
+ 'url': 'https://youtu.be/uWyaPkt-VOI?list=PL9D9FC436B881BA21',
+ 'only_matching': True,
+ }, {
+ 'url': 'TLGGrESM50VT6acwMjAyMjAxNw',
+ 'only_matching': True,
+ }, {
+ # music album playlist
+ 'url': 'OLAK5uy_m4xAFdmMC5rX3Ji3g93pQe3hqLZw_9LhM',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://invidio.us/playlist?list=PLDIoUOhQQPlXr63I_vwF9GD8sAKh77dWU',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtubekids.com/watch?v=Agk7R8I8o5U&list=PUZ6jURNr1WQZCNHF0ao-c0g',
+ 'only_matching': True,
+ }]
+
+ def _real_initialize(self):
+ self._login()
+
+ def extract_videos_from_page(self, page):
+ ids_in_page = []
+ titles_in_page = []
+
+ for item in re.findall(
+ r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page):
+ attrs = extract_attributes(item)
+ video_id = attrs['data-video-id']
+ video_title = unescapeHTML(attrs.get('data-title'))
+ if video_title:
+ video_title = video_title.strip()
+ ids_in_page.append(video_id)
+ titles_in_page.append(video_title)
+
+ # Fallback with old _VIDEO_RE
+ self.extract_videos_from_page_impl(
+ self._VIDEO_RE, page, ids_in_page, titles_in_page)
+
+ # Relaxed fallbacks
+ self.extract_videos_from_page_impl(
+ r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page,
+ ids_in_page, titles_in_page)
+ self.extract_videos_from_page_impl(
+ r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page,
+ ids_in_page, titles_in_page)
+
+ return zip(ids_in_page, titles_in_page)
+
+ def _extract_mix(self, playlist_id):
+ # The mixes are generated from a single video
+ # the id of the playlist is just 'RD' + video_id
+ ids = []
+ last_id = playlist_id[-11:]
+ for n in itertools.count(1):
+ url = 'https://www.youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
+ webpage = self._download_webpage(
+ url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
+ new_ids = orderedSet(re.findall(
+ r'''(?xs)data-video-username=".*?".*?
+ href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
+ webpage))
+ # Fetch new pages until all the videos are repeated, it seems that
+ # there are always 51 unique videos.
+ new_ids = [_id for _id in new_ids if _id not in ids]
+ if not new_ids:
+ break
+ ids.extend(new_ids)
+ last_id = ids[-1]
+
+ url_results = self._ids_to_results(ids)
+
+ search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
+ title_span = (
+ search_title('playlist-title')
+ or search_title('title long-title')
+ or search_title('title'))
+ title = clean_html(title_span)
+
+ return self.playlist_result(url_results, playlist_id, title)
+
+ def _extract_playlist(self, playlist_id):
+ url = self._TEMPLATE_URL % playlist_id
+ page = self._download_webpage(url, playlist_id)
+
+ # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604)
+ for match in re.findall(r'<div class="yt-alert-message"[^>]*>([^<]+)</div>', page):
+ match = match.strip()
+ # Check if the playlist exists or is private
+ mobj = re.match(r'[^<]*(?:The|This) playlist (?P<reason>does not exist|is private)[^<]*', match)
+ if mobj:
+ reason = mobj.group('reason')
+ message = 'This playlist %s' % reason
+ if 'private' in reason:
+ message += ', use --username or --netrc to access it'
+ message += '.'
+ raise ExtractorError(message, expected=True)
+ elif re.match(r'[^<]*Invalid parameters[^<]*', match):
+ raise ExtractorError(
+ 'Invalid parameters. Maybe URL is incorrect.',
+ expected=True)
+ elif re.match(r'[^<]*Choose your language[^<]*', match):
+ continue
+ else:
+ self.report_warning('Youtube gives an alert message: ' + match)
+
+ playlist_title = self._html_search_regex(
+ r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>',
+ page, 'title', default=None)
+
+ _UPLOADER_BASE = r'class=["\']pl-header-details[^>]+>\s*<li>\s*<a[^>]+\bhref='
+ uploader = self._html_search_regex(
+ r'%s["\']/(?:user|channel)/[^>]+>([^<]+)' % _UPLOADER_BASE,
+ page, 'uploader', default=None)
+ mobj = re.search(
+ r'%s(["\'])(?P<path>/(?:user|channel)/(?P<uploader_id>.+?))\1' % _UPLOADER_BASE,
+ page)
+ if mobj:
+ uploader_id = mobj.group('uploader_id')
+ uploader_url = compat_urlparse.urljoin(url, mobj.group('path'))
+ else:
+ uploader_id = uploader_url = None
+
+ has_videos = True
+
+ if not playlist_title:
+ try:
+ # Some playlist URLs don't actually serve a playlist (e.g.
+ # https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4)
+ next(self._entries(page, playlist_id))
+ except StopIteration:
+ has_videos = False
+
+ playlist = self.playlist_result(
+ self._entries(page, playlist_id), playlist_id, playlist_title)
+ playlist.update({
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'uploader_url': uploader_url,
+ })
+
+ return has_videos, playlist
+
+ def _check_download_just_video(self, url, playlist_id):
+ # Check if it's a video-specific URL
+ query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
+ video_id = query_dict.get('v', [None])[0] or self._search_regex(
+ r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
+ 'video id', default=None)
+ if video_id:
+ if self._downloader.params.get('noplaylist'):
+ self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
+ return video_id, self.url_result(video_id, 'Youtube', video_id=video_id)
+ else:
+ self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id))
+ return video_id, None
+ return None, None
+
+ def _real_extract(self, url):
+ # Extract playlist id
+ mobj = re.match(self._VALID_URL, url)
+ if mobj is None:
+ raise ExtractorError('Invalid URL: %s' % url)
+ playlist_id = mobj.group(1) or mobj.group(2)
+
+ video_id, video = self._check_download_just_video(url, playlist_id)
+ if video:
+ return video
+
+ if playlist_id.startswith(('RD', 'UL', 'PU')):
+ # Mixes require a custom extraction process
+ return self._extract_mix(playlist_id)
+
+ has_videos, playlist = self._extract_playlist(playlist_id)
+ if has_videos or not video_id:
+ return playlist
+
+ # Some playlist URLs don't actually serve a playlist (see
+ # https://github.com/ytdl-org/youtube-dl/issues/10537).
+ # Fallback to plain video extraction if there is a video id
+ # along with playlist id.
+ return self.url_result(video_id, 'Youtube', video_id=video_id)
+
+
+class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor):
+ IE_DESC = 'YouTube.com channels'
+ _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie|kids)?\.com|(?:www\.)?invidio\.us)/channel/(?P<id>[0-9A-Za-z_-]+)'
+ _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos'
+ _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?'
+ IE_NAME = 'youtube:channel'
+ _TESTS = [{
+ 'note': 'paginated channel',
+ 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',
+ 'playlist_mincount': 91,
+ 'info_dict': {
+ 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w',
+ 'title': 'Uploads from lex will',
+ 'uploader': 'lex will',
+ 'uploader_id': 'UCKfVa3S1e4PHvxWcwyMMg8w',
+ }
+ }, {
+ 'note': 'Age restricted channel',
+ # from https://www.youtube.com/user/DeusExOfficial
+ 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w',
+ 'playlist_mincount': 64,
+ 'info_dict': {
+ 'id': 'UUs0ifCMCm1icqRbqhUINa0w',
+ 'title': 'Uploads from Deus Ex',
+ 'uploader': 'Deus Ex',
+ 'uploader_id': 'DeusExOfficial',
+ },
+ }, {
+ 'url': 'https://invidio.us/channel/UC23qupoDRn9YOAVzeoxjOQA',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtubekids.com/channel/UCyu8StPfZWapR6rfW_JgqcA',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return (False if YoutubePlaylistsIE.suitable(url) or YoutubeLiveIE.suitable(url)
+ else super(YoutubeChannelIE, cls).suitable(url))
+
+ def _build_template_url(self, url, channel_id):
+ return self._TEMPLATE_URL % channel_id
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+
+ url = self._build_template_url(url, channel_id)
+
+ # Channel by page listing is restricted to 35 pages of 30 items, i.e. 1050 videos total (see #5778)
+ # Workaround by extracting as a playlist if managed to obtain channel playlist URL
+ # otherwise fallback on channel by page extraction
+ channel_page = self._download_webpage(
+ url + '?view=57', channel_id,
+ 'Downloading channel page', fatal=False)
+ if channel_page is False:
+ channel_playlist_id = False
+ else:
+ channel_playlist_id = self._html_search_meta(
+ 'channelId', channel_page, 'channel id', default=None)
+ if not channel_playlist_id:
+ channel_url = self._html_search_meta(
+ ('al:ios:url', 'twitter:app:url:iphone', 'twitter:app:url:ipad'),
+ channel_page, 'channel url', default=None)
+ if channel_url:
+ channel_playlist_id = self._search_regex(
+ r'vnd\.youtube://user/([0-9A-Za-z_-]+)',
+ channel_url, 'channel id', default=None)
+ if channel_playlist_id and channel_playlist_id.startswith('UC'):
+ playlist_id = 'UU' + channel_playlist_id[2:]
+ return self.url_result(
+ compat_urlparse.urljoin(url, '/playlist?list=%s' % playlist_id), 'YoutubePlaylist')
+
+ channel_page = self._download_webpage(url, channel_id, 'Downloading page #1')
+ autogenerated = re.search(r'''(?x)
+ class="[^"]*?(?:
+ channel-header-autogenerated-label|
+ yt-channel-title-autogenerated
+ )[^"]*"''', channel_page) is not None
+
+ if autogenerated:
+ # The videos are contained in a single page
+ # the ajax pages can't be used, they are empty
+ entries = [
+ self.url_result(
+ video_id, 'Youtube', video_id=video_id,
+ video_title=video_title)
+ for video_id, video_title in self.extract_videos_from_page(channel_page)]
+ return self.playlist_result(entries, channel_id)
+
+ try:
+ next(self._entries(channel_page, channel_id))
+ except StopIteration:
+ alert_message = self._html_search_regex(
+ r'(?s)<div[^>]+class=(["\']).*?\byt-alert-message\b.*?\1[^>]*>(?P<alert>[^<]+)</div>',
+ channel_page, 'alert', default=None, group='alert')
+ if alert_message:
+ raise ExtractorError('Youtube said: %s' % alert_message, expected=True)
+
+ return self.playlist_result(self._entries(channel_page, channel_id), channel_id)
+
+
+class YoutubeUserIE(YoutubeChannelIE):
+ IE_DESC = 'YouTube.com user videos (URL or "ytuser" keyword)'
+ _VALID_URL = r'(?:(?:https?://(?:\w+\.)?youtube\.com/(?:(?P<user>user|c)/)?(?!(?:attribution_link|watch|results|shared)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)(?P<id>[A-Za-z0-9_-]+)'
+ _TEMPLATE_URL = 'https://www.youtube.com/%s/%s/videos'
+ IE_NAME = 'youtube:user'
+
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/user/TheLinuxFoundation',
+ 'playlist_mincount': 320,
+ 'info_dict': {
+ 'id': 'UUfX55Sx5hEFjoC3cNs6mCUQ',
+ 'title': 'Uploads from The Linux Foundation',
+ 'uploader': 'The Linux Foundation',
+ 'uploader_id': 'TheLinuxFoundation',
+ }
+ }, {
+ # Only available via https://www.youtube.com/c/12minuteathlete/videos
+ # but not https://www.youtube.com/user/12minuteathlete/videos
+ 'url': 'https://www.youtube.com/c/12minuteathlete/videos',
+ 'playlist_mincount': 249,
+ 'info_dict': {
+ 'id': 'UUVjM-zV6_opMDx7WYxnjZiQ',
+ 'title': 'Uploads from 12 Minute Athlete',
+ 'uploader': '12 Minute Athlete',
+ 'uploader_id': 'the12minuteathlete',
+ }
+ }, {
+ 'url': 'ytuser:phihag',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/c/gametrailers',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/gametrailers',
+ 'only_matching': True,
+ }, {
+ # This channel is not available, geo restricted to JP
+ 'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ # Don't return True if the url can be extracted with other youtube
+ # extractor, the regex would is too permissive and it would match.
+ other_yt_ies = iter(klass for (name, klass) in globals().items() if name.startswith('Youtube') and name.endswith('IE') and klass is not cls)
+ if any(ie.suitable(url) for ie in other_yt_ies):
+ return False
+ else:
+ return super(YoutubeUserIE, cls).suitable(url)
+
+ def _build_template_url(self, url, channel_id):
+ mobj = re.match(self._VALID_URL, url)
+ return self._TEMPLATE_URL % (mobj.group('user') or 'user', mobj.group('id'))
+
+
+class YoutubeLiveIE(YoutubeBaseInfoExtractor):
+ IE_DESC = 'YouTube.com live streams'
+ _VALID_URL = r'(?P<base_url>https?://(?:\w+\.)?youtube\.com/(?:(?:user|channel|c)/)?(?P<id>[^/]+))/live'
+ IE_NAME = 'youtube:live'
+
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/user/TheYoungTurks/live',
+ 'info_dict': {
+ 'id': 'a48o2S1cPoo',
+ 'ext': 'mp4',
+ 'title': 'The Young Turks - Live Main Show',
+ 'uploader': 'The Young Turks',
+ 'uploader_id': 'TheYoungTurks',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheYoungTurks',
+ 'upload_date': '20150715',
+ 'license': 'Standard YouTube License',
+ 'description': 'md5:438179573adcdff3c97ebb1ee632b891',
+ 'categories': ['News & Politics'],
+ 'tags': ['Cenk Uygur (TV Program Creator)', 'The Young Turks (Award-Winning Work)', 'Talk Show (TV Genre)'],
+ 'like_count': int,
+ 'dislike_count': int,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'https://www.youtube.com/channel/UC1yBKRuGpC1tSM73A0ZjYjQ/live',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/c/CommanderVideoHq/live',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/TheYoungTurks/live',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ channel_id = mobj.group('id')
+ base_url = mobj.group('base_url')
+ webpage = self._download_webpage(url, channel_id, fatal=False)
+ if webpage:
+ page_type = self._og_search_property(
+ 'type', webpage, 'page type', default='')
+ video_id = self._html_search_meta(
+ 'videoId', webpage, 'video id', default=None)
+ if page_type.startswith('video') and video_id and re.match(
+ r'^[0-9A-Za-z_-]{11}$', video_id):
+ return self.url_result(video_id, YoutubeIE.ie_key())
+ return self.url_result(base_url)
+
+
+class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
+ IE_DESC = 'YouTube.com user/channel playlists'
+ _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/(?:user|channel|c)/(?P<id>[^/]+)/playlists'
+ IE_NAME = 'youtube:playlists'
+
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/user/ThirstForScience/playlists',
+ 'playlist_mincount': 4,
+ 'info_dict': {
+ 'id': 'ThirstForScience',
+ 'title': 'ThirstForScience',
+ },
+ }, {
+ # with "Load more" button
+ 'url': 'https://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd',
+ 'playlist_mincount': 70,
+ 'info_dict': {
+ 'id': 'igorkle1',
+ 'title': 'Игорь Клейнер',
+ },
+ }, {
+ 'url': 'https://www.youtube.com/channel/UCiU1dHvZObB2iP6xkJ__Icw/playlists',
+ 'playlist_mincount': 17,
+ 'info_dict': {
+ 'id': 'UCiU1dHvZObB2iP6xkJ__Icw',
+ 'title': 'Chem Player',
+ },
+ 'skip': 'Blocked',
+ }, {
+ 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists',
+ 'only_matching': True,
+ }]
+
+
+class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
+ _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
+
+
+class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
+ IE_DESC = 'YouTube.com searches'
+ # there doesn't appear to be a real limit, for example if you search for
+ # 'python' you get more than 8.000.000 results
+ _MAX_RESULTS = float('inf')
+ IE_NAME = 'youtube:search'
+ _SEARCH_KEY = 'ytsearch'
+ _EXTRA_QUERY_ARGS = {}
+ _TESTS = []
+
+ def _get_n_results(self, query, n):
+ """Get a specified number of results for a query"""
+
+ videos = []
+ limit = n
+
+ url_query = {
+ 'search_query': query.encode('utf-8'),
+ }
+ url_query.update(self._EXTRA_QUERY_ARGS)
+ result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
+
+ for pagenum in itertools.count(1):
+ data = self._download_json(
+ result_url, video_id='query "%s"' % query,
+ note='Downloading page %s' % pagenum,
+ errnote='Unable to download API page',
+ query={'spf': 'navigate'})
+ html_content = data[1]['body']['content']
+
+ if 'class="search-message' in html_content:
+ raise ExtractorError(
+ '[youtube] No video results', expected=True)
+
+ new_videos = list(self._process_page(html_content))
+ videos += new_videos
+ if not new_videos or len(videos) > limit:
+ break
+ next_link = self._html_search_regex(
+ r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
+ html_content, 'next link', default=None)
+ if next_link is None:
+ break
+ result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
+
+ if len(videos) > n:
+ videos = videos[:n]
+ return self.playlist_result(videos, query)
+
+
+class YoutubeSearchDateIE(YoutubeSearchIE):
+ IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
+ _SEARCH_KEY = 'ytsearchdate'
+ IE_DESC = 'YouTube.com searches, newest videos first'
+ _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
+
+
+class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
+ IE_DESC = 'YouTube.com search URLs'
+ IE_NAME = 'youtube:search_url'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
+ 'playlist_mincount': 5,
+ 'info_dict': {
+ 'title': 'youtube-dl test video',
+ }
+ }, {
+ 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ query = compat_urllib_parse_unquote_plus(mobj.group('query'))
+ webpage = self._download_webpage(url, query)
+ return self.playlist_result(self._process_page(webpage), playlist_title=query)
+
+
+class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor):
+ IE_DESC = 'YouTube.com (multi-season) shows'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/show/(?P<id>[^?#]*)'
+ IE_NAME = 'youtube:show'
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/show/airdisasters',
+ 'playlist_mincount': 5,
+ 'info_dict': {
+ 'id': 'airdisasters',
+ 'title': 'Air Disasters',
+ }
+ }]
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+ return super(YoutubeShowIE, self)._real_extract(
+ 'https://www.youtube.com/show/%s/playlists' % playlist_id)
+
+
+class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
+ """
+ Base class for feed extractors
+ Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
+ """
+ _LOGIN_REQUIRED = True
+
+ @property
+ def IE_NAME(self):
+ return 'youtube:%s' % self._FEED_NAME
+
+ def _real_initialize(self):
+ self._login()
+
+ def _entries(self, page):
+ # The extraction process is the same as for playlists, but the regex
+ # for the video ids doesn't contain an index
+ ids = []
+ more_widget_html = content_html = page
+ for page_num in itertools.count(1):
+ matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
+
+ # 'recommended' feed has infinite 'load more' and each new portion spins
+ # the same videos in (sometimes) slightly different order, so we'll check
+ # for unicity and break when portion has no new videos
+ new_ids = list(filter(lambda video_id: video_id not in ids, orderedSet(matches)))
+ if not new_ids:
+ break
+
+ ids.extend(new_ids)
+
+ for entry in self._ids_to_results(new_ids):
+ yield entry
+
+ mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
+ if not mobj:
+ break
+
+ more = self._download_json(
+ 'https://www.youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
+ 'Downloading page #%s' % page_num,
+ transform_source=uppercase_escape,
+ headers=self._YOUTUBE_CLIENT_HEADERS)
+ content_html = more['content_html']
+ more_widget_html = more['load_more_widget_html']
+
+ def _real_extract(self, url):
+ page = self._download_webpage(
+ 'https://www.youtube.com/feed/%s' % self._FEED_NAME,
+ self._PLAYLIST_TITLE)
+ return self.playlist_result(
+ self._entries(page), playlist_title=self._PLAYLIST_TITLE)
+
+
+class YoutubeWatchLaterIE(YoutubePlaylistIE):
+ IE_NAME = 'youtube:watchlater'
+ IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:feed/watch_later|(?:playlist|watch)\?(?:.+&)?list=WL)|:ytwatchlater'
+
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/playlist?list=WL',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/watch?v=bCNU9TrbiRk&index=1&list=WL',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ _, video = self._check_download_just_video(url, 'WL')
+ if video:
+ return video
+ _, playlist = self._extract_playlist('WL')
+ return playlist
+
+
+class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
+ IE_NAME = 'youtube:favorites'
+ IE_DESC = 'YouTube.com favourite videos, ":ytfav" for short (requires authentication)'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/my_favorites|:ytfav(?:ou?rites)?'
+ _LOGIN_REQUIRED = True
+
+ def _real_extract(self, url):
+ webpage = self._download_webpage('https://www.youtube.com/my_favorites', 'Youtube Favourites videos')
+ playlist_id = self._search_regex(r'list=(.+?)["&]', webpage, 'favourites playlist id')
+ return self.url_result(playlist_id, 'YoutubePlaylist')
+
+
+class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
+ IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/recommended|:ytrec(?:ommended)?'
+ _FEED_NAME = 'recommended'
+ _PLAYLIST_TITLE = 'Youtube Recommended videos'
+
+
+class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
+ IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
+ _FEED_NAME = 'subscriptions'
+ _PLAYLIST_TITLE = 'Youtube Subscriptions'
+
+
+class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
+ IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/feed/history|:ythistory'
+ _FEED_NAME = 'history'
+ _PLAYLIST_TITLE = 'Youtube History'
+
+
+class YoutubeTruncatedURLIE(InfoExtractor):
+ IE_NAME = 'youtube:truncated_url'
+ IE_DESC = False # Do not list
+ _VALID_URL = r'''(?x)
+ (?:https?://)?
+ (?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie)?\.com/
+ (?:watch\?(?:
+ feature=[a-z_]+|
+ annotation_id=annotation_[^&]+|
+ x-yt-cl=[0-9]+|
+ hl=[^&]*|
+ t=[0-9]+
+ )?
+ |
+ attribution_link\?a=[^&]+
+ )
+ $
+ '''
+
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/watch?annotation_id=annotation_3951667041',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/watch?',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/watch?x-yt-cl=84503534',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/watch?feature=foo',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/watch?hl=en-GB',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.youtube.com/watch?t=2372',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ raise ExtractorError(
+ 'Did you forget to quote the URL? Remember that & is a meta '
+ 'character in most shells, so you want to put the URL in quotes, '
+ 'like youtube-dl '
+ '"https://www.youtube.com/watch?feature=foo&v=BaW_jenozKc" '
+ ' or simply youtube-dl BaW_jenozKc .',
+ expected=True)
+
+
+class YoutubeTruncatedIDIE(InfoExtractor):
+ IE_NAME = 'youtube:truncated_id'
+ IE_DESC = False # Do not list
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/watch\?v=(?P<id>[0-9A-Za-z_-]{1,10})$'
+
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/watch?v=N_708QY7Ob',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+ raise ExtractorError(
+ 'Incomplete YouTube ID %s. URL %s looks truncated.' % (video_id, url),
+ expected=True)
diff --git a/youtube_dlc/extractor/zapiks.py b/youtube_dlc/extractor/zapiks.py
new file mode 100644
index 000000000..f6496f516
--- /dev/null
+++ b/youtube_dlc/extractor/zapiks.py
@@ -0,0 +1,109 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ parse_iso8601,
+ xpath_with_ns,
+ xpath_text,
+ int_or_none,
+)
+
+
+class ZapiksIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?zapiks\.(?:fr|com)/(?:(?:[a-z]{2}/)?(?P<display_id>.+?)\.html|index\.php\?.*\bmedia_id=(?P<id>\d+))'
+ _TESTS = [
+ {
+ 'url': 'http://www.zapiks.fr/ep2s3-bon-appetit-eh-be-viva.html',
+ 'md5': 'aeb3c473b2d564b2d46d664d28d5f050',
+ 'info_dict': {
+ 'id': '80798',
+ 'ext': 'mp4',
+ 'title': 'EP2S3 - Bon Appétit - Eh bé viva les pyrénées con!',
+ 'description': 'md5:7054d6f6f620c6519be1fe710d4da847',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'duration': 528,
+ 'timestamp': 1359044972,
+ 'upload_date': '20130124',
+ 'view_count': int,
+ },
+ },
+ {
+ 'url': 'http://www.zapiks.com/ep3s5-bon-appetit-baqueira-m-1.html',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.zapiks.com/nl/ep3s5-bon-appetit-baqueira-m-1.html',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'http://www.zapiks.fr/index.php?action=playerIframe&amp;media_id=118046&amp;width=640&amp;height=360&amp;autoStart=false&amp;language=fr',
+ 'only_matching': True,
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+ display_id = mobj.group('display_id') or video_id
+
+ webpage = self._download_webpage(url, display_id)
+
+ if not video_id:
+ video_id = self._search_regex(
+ r'data-media-id="(\d+)"', webpage, 'video id')
+
+ playlist = self._download_xml(
+ 'http://www.zapiks.fr/view/index.php?action=playlist&media_id=%s&lang=en' % video_id,
+ display_id)
+
+ NS_MAP = {
+ 'jwplayer': 'http://rss.jwpcdn.com/'
+ }
+
+ def ns(path):
+ return xpath_with_ns(path, NS_MAP)
+
+ item = playlist.find('./channel/item')
+
+ title = xpath_text(item, 'title', 'title') or self._og_search_title(webpage)
+ description = self._og_search_description(webpage, default=None)
+ thumbnail = xpath_text(
+ item, ns('./jwplayer:image'), 'thumbnail') or self._og_search_thumbnail(webpage, default=None)
+ duration = parse_duration(self._html_search_meta(
+ 'duration', webpage, 'duration', default=None))
+ timestamp = parse_iso8601(self._html_search_meta(
+ 'uploadDate', webpage, 'upload date', default=None), ' ')
+
+ view_count = int_or_none(self._search_regex(
+ r'UserPlays:(\d+)', webpage, 'view count', default=None))
+ comment_count = int_or_none(self._search_regex(
+ r'UserComments:(\d+)', webpage, 'comment count', default=None))
+
+ formats = []
+ for source in item.findall(ns('./jwplayer:source')):
+ format_id = source.attrib['label']
+ f = {
+ 'url': source.attrib['file'],
+ 'format_id': format_id,
+ }
+ m = re.search(r'^(?P<height>\d+)[pP]', format_id)
+ if m:
+ f['height'] = int(m.group('height'))
+ formats.append(f)
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'formats': formats,
+ }
diff --git a/youtube_dl/extractor/zaq1.py b/youtube_dlc/extractor/zaq1.py
index 889aff5d8..889aff5d8 100644
--- a/youtube_dl/extractor/zaq1.py
+++ b/youtube_dlc/extractor/zaq1.py
diff --git a/youtube_dl/extractor/zattoo.py b/youtube_dlc/extractor/zattoo.py
index 6bac3026e..6bac3026e 100644
--- a/youtube_dl/extractor/zattoo.py
+++ b/youtube_dlc/extractor/zattoo.py
diff --git a/youtube_dlc/extractor/zdf.py b/youtube_dlc/extractor/zdf.py
new file mode 100644
index 000000000..7b5ad4a6e
--- /dev/null
+++ b/youtube_dlc/extractor/zdf.py
@@ -0,0 +1,332 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+ determine_ext,
+ int_or_none,
+ NO_DEFAULT,
+ orderedSet,
+ parse_codecs,
+ qualities,
+ try_get,
+ unified_timestamp,
+ update_url_query,
+ url_or_none,
+ urljoin,
+)
+
+
+class ZDFBaseIE(InfoExtractor):
+ def _call_api(self, url, player, referrer, video_id, item):
+ return self._download_json(
+ url, video_id, 'Downloading JSON %s' % item,
+ headers={
+ 'Referer': referrer,
+ 'Api-Auth': 'Bearer %s' % player['apiToken'],
+ })
+
+ def _extract_player(self, webpage, video_id, fatal=True):
+ return self._parse_json(
+ self._search_regex(
+ r'(?s)data-zdfplayer-jsb=(["\'])(?P<json>{.+?})\1', webpage,
+ 'player JSON', default='{}' if not fatal else NO_DEFAULT,
+ group='json'),
+ video_id)
+
+
+class ZDFIE(ZDFBaseIE):
+ IE_NAME = "ZDF-3sat"
+ _VALID_URL = r'https?://www\.(zdf|3sat)\.de/(?:[^/]+/)*(?P<id>[^/?]+)\.html'
+ _QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh')
+ _GEO_COUNTRIES = ['DE']
+
+ _TESTS = [{
+ 'url': 'https://www.3sat.de/wissen/wissenschaftsdoku/luxusgut-lebensraum-100.html',
+ 'info_dict': {
+ 'id': 'luxusgut-lebensraum-100',
+ 'ext': 'mp4',
+ 'title': 'Luxusgut Lebensraum',
+ 'description': 'md5:5c09b2f45ac3bc5233d1b50fc543d061',
+ 'duration': 2601,
+ 'timestamp': 1566497700,
+ 'upload_date': '20190822',
+ }
+ }, {
+ 'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
+ 'info_dict': {
+ 'id': 'die-magie-der-farben-von-koenigspurpur-und-jeansblau-100',
+ 'ext': 'mp4',
+ 'title': 'Die Magie der Farben (2/2)',
+ 'description': 'md5:a89da10c928c6235401066b60a6d5c1a',
+ 'duration': 2615,
+ 'timestamp': 1465021200,
+ 'upload_date': '20160604',
+ },
+ }, {
+ 'url': 'https://www.zdf.de/service-und-hilfe/die-neue-zdf-mediathek/zdfmediathek-trailer-100.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.zdf.de/filme/taunuskrimi/die-lebenden-und-die-toten-1---ein-taunuskrimi-100.html',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.zdf.de/dokumentation/planet-e/planet-e-uebersichtsseite-weitere-dokumentationen-von-planet-e-100.html',
+ 'only_matching': True,
+ }]
+
+ @staticmethod
+ def _extract_subtitles(src):
+ subtitles = {}
+ for caption in try_get(src, lambda x: x['captions'], list) or []:
+ subtitle_url = url_or_none(caption.get('uri'))
+ if subtitle_url:
+ lang = caption.get('language', 'deu')
+ subtitles.setdefault(lang, []).append({
+ 'url': subtitle_url,
+ })
+ return subtitles
+
+ def _extract_format(self, video_id, formats, format_urls, meta):
+ format_url = url_or_none(meta.get('url'))
+ if not format_url:
+ return
+ if format_url in format_urls:
+ return
+ format_urls.add(format_url)
+ mime_type = meta.get('mimeType')
+ ext = determine_ext(format_url)
+ if mime_type == 'application/x-mpegURL' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, video_id, 'mp4', m3u8_id='hls',
+ entry_protocol='m3u8_native', fatal=False))
+ elif mime_type == 'application/f4m+xml' or ext == 'f4m':
+ formats.extend(self._extract_f4m_formats(
+ update_url_query(format_url, {'hdcore': '3.7.0'}), video_id, f4m_id='hds', fatal=False))
+ else:
+ f = parse_codecs(meta.get('mimeCodec'))
+ format_id = ['http']
+ for p in (meta.get('type'), meta.get('quality')):
+ if p and isinstance(p, compat_str):
+ format_id.append(p)
+ f.update({
+ 'url': format_url,
+ 'format_id': '-'.join(format_id),
+ 'format_note': meta.get('quality'),
+ 'language': meta.get('language'),
+ 'quality': qualities(self._QUALITIES)(meta.get('quality')),
+ 'preference': -10,
+ })
+ formats.append(f)
+
+ def _extract_entry(self, url, player, content, video_id):
+ title = content.get('title') or content['teaserHeadline']
+
+ t = content['mainVideoContent']['http://zdf.de/rels/target']
+
+ ptmd_path = t.get('http://zdf.de/rels/streams/ptmd')
+
+ if not ptmd_path:
+ ptmd_path = t[
+ 'http://zdf.de/rels/streams/ptmd-template'].replace(
+ '{playerId}', 'portal')
+
+ ptmd = self._call_api(
+ urljoin(url, ptmd_path), player, url, video_id, 'metadata')
+
+ formats = []
+ track_uris = set()
+ for p in ptmd['priorityList']:
+ formitaeten = p.get('formitaeten')
+ if not isinstance(formitaeten, list):
+ continue
+ for f in formitaeten:
+ f_qualities = f.get('qualities')
+ if not isinstance(f_qualities, list):
+ continue
+ for quality in f_qualities:
+ tracks = try_get(quality, lambda x: x['audio']['tracks'], list)
+ if not tracks:
+ continue
+ for track in tracks:
+ self._extract_format(
+ video_id, formats, track_uris, {
+ 'url': track.get('uri'),
+ 'type': f.get('type'),
+ 'mimeType': f.get('mimeType'),
+ 'quality': quality.get('quality'),
+ 'language': track.get('language'),
+ })
+ self._sort_formats(formats)
+
+ thumbnails = []
+ layouts = try_get(
+ content, lambda x: x['teaserImageRef']['layouts'], dict)
+ if layouts:
+ for layout_key, layout_url in layouts.items():
+ layout_url = url_or_none(layout_url)
+ if not layout_url:
+ continue
+ thumbnail = {
+ 'url': layout_url,
+ 'format_id': layout_key,
+ }
+ mobj = re.search(r'(?P<width>\d+)x(?P<height>\d+)', layout_key)
+ if mobj:
+ thumbnail.update({
+ 'width': int(mobj.group('width')),
+ 'height': int(mobj.group('height')),
+ })
+ thumbnails.append(thumbnail)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': content.get('leadParagraph') or content.get('teasertext'),
+ 'duration': int_or_none(t.get('duration')),
+ 'timestamp': unified_timestamp(content.get('editorialDate')),
+ 'thumbnails': thumbnails,
+ 'subtitles': self._extract_subtitles(ptmd),
+ 'formats': formats,
+ }
+
+ def _extract_regular(self, url, player, video_id):
+ content = self._call_api(
+ player['content'], player, url, video_id, 'content')
+ return self._extract_entry(player['content'], player, content, video_id)
+
+ def _extract_mobile(self, video_id):
+ document = self._download_json(
+ 'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id,
+ video_id)['document']
+
+ title = document['titel']
+
+ formats = []
+ format_urls = set()
+ for f in document['formitaeten']:
+ self._extract_format(video_id, formats, format_urls, f)
+ self._sort_formats(formats)
+
+ thumbnails = []
+ teaser_bild = document.get('teaserBild')
+ if isinstance(teaser_bild, dict):
+ for thumbnail_key, thumbnail in teaser_bild.items():
+ thumbnail_url = try_get(
+ thumbnail, lambda x: x['url'], compat_str)
+ if thumbnail_url:
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'id': thumbnail_key,
+ 'width': int_or_none(thumbnail.get('width')),
+ 'height': int_or_none(thumbnail.get('height')),
+ })
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': document.get('beschreibung'),
+ 'duration': int_or_none(document.get('length')),
+ 'timestamp': unified_timestamp(try_get(
+ document, lambda x: x['meta']['editorialDate'], compat_str)),
+ 'thumbnails': thumbnails,
+ 'subtitles': self._extract_subtitles(document),
+ 'formats': formats,
+ }
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id, fatal=False)
+ if webpage:
+ player = self._extract_player(webpage, url, fatal=False)
+ if player:
+ return self._extract_regular(url, player, video_id)
+
+ return self._extract_mobile(video_id)
+
+
+class ZDFChannelIE(ZDFBaseIE):
+ _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
+ _TESTS = [{
+ 'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio',
+ 'info_dict': {
+ 'id': 'das-aktuelle-sportstudio',
+ 'title': 'das aktuelle sportstudio | ZDF',
+ },
+ 'playlist_mincount': 23,
+ }, {
+ 'url': 'https://www.zdf.de/dokumentation/planet-e',
+ 'info_dict': {
+ 'id': 'planet-e',
+ 'title': 'planet e.',
+ },
+ 'playlist_mincount': 50,
+ }, {
+ 'url': 'https://www.zdf.de/filme/taunuskrimi/',
+ 'only_matching': True,
+ }]
+
+ @classmethod
+ def suitable(cls, url):
+ return False if ZDFIE.suitable(url) else super(ZDFChannelIE, cls).suitable(url)
+
+ def _real_extract(self, url):
+ channel_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, channel_id)
+
+ entries = [
+ self.url_result(item_url, ie=ZDFIE.ie_key())
+ for item_url in orderedSet(re.findall(
+ r'data-plusbar-url=["\'](http.+?\.html)', webpage))]
+
+ return self.playlist_result(
+ entries, channel_id, self._og_search_title(webpage, fatal=False))
+
+ r"""
+ player = self._extract_player(webpage, channel_id)
+
+ channel_id = self._search_regex(
+ r'docId\s*:\s*(["\'])(?P<id>(?!\1).+?)\1', webpage,
+ 'channel id', group='id')
+
+ channel = self._call_api(
+ 'https://api.zdf.de/content/documents/%s.json' % channel_id,
+ player, url, channel_id)
+
+ items = []
+ for module in channel['module']:
+ for teaser in try_get(module, lambda x: x['teaser'], list) or []:
+ t = try_get(
+ teaser, lambda x: x['http://zdf.de/rels/target'], dict)
+ if not t:
+ continue
+ items.extend(try_get(
+ t,
+ lambda x: x['resultsWithVideo']['http://zdf.de/rels/search/results'],
+ list) or [])
+ items.extend(try_get(
+ module,
+ lambda x: x['filterRef']['resultsWithVideo']['http://zdf.de/rels/search/results'],
+ list) or [])
+
+ entries = []
+ entry_urls = set()
+ for item in items:
+ t = try_get(item, lambda x: x['http://zdf.de/rels/target'], dict)
+ if not t:
+ continue
+ sharing_url = t.get('http://zdf.de/rels/sharing-url')
+ if not sharing_url or not isinstance(sharing_url, compat_str):
+ continue
+ if sharing_url in entry_urls:
+ continue
+ entry_urls.add(sharing_url)
+ entries.append(self.url_result(
+ sharing_url, ie=ZDFIE.ie_key(), video_id=t.get('id')))
+
+ return self.playlist_result(entries, channel_id, channel.get('title'))
+ """
diff --git a/youtube_dl/extractor/zingmp3.py b/youtube_dlc/extractor/zingmp3.py
index adfdcaabf..adfdcaabf 100644
--- a/youtube_dl/extractor/zingmp3.py
+++ b/youtube_dlc/extractor/zingmp3.py
diff --git a/youtube_dlc/extractor/zype.py b/youtube_dlc/extractor/zype.py
new file mode 100644
index 000000000..2e2e97a0c
--- /dev/null
+++ b/youtube_dlc/extractor/zype.py
@@ -0,0 +1,134 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_HTTPError
+from ..utils import (
+ dict_get,
+ ExtractorError,
+ int_or_none,
+ js_to_json,
+ parse_iso8601,
+)
+
+
+class ZypeIE(InfoExtractor):
+ _ID_RE = r'[\da-fA-F]+'
+ _COMMON_RE = r'//player\.zype\.com/embed/%s\.(?:js|json|html)\?.*?(?:access_token|(?:ap[ip]|player)_key)='
+ _VALID_URL = r'https?:%s[^&]+' % (_COMMON_RE % ('(?P<id>%s)' % _ID_RE))
+ _TEST = {
+ 'url': 'https://player.zype.com/embed/5b400b834b32992a310622b9.js?api_key=jZ9GUhRmxcPvX7M3SlfejB6Hle9jyHTdk2jVxG7wOHPLODgncEKVdPYBhuz9iWXQ&autoplay=false&controls=true&da=false',
+ 'md5': 'eaee31d474c76a955bdaba02a505c595',
+ 'info_dict': {
+ 'id': '5b400b834b32992a310622b9',
+ 'ext': 'mp4',
+ 'title': 'Smoky Barbecue Favorites',
+ 'thumbnail': r're:^https?://.*\.jpe?g',
+ 'description': 'md5:5ff01e76316bd8d46508af26dc86023b',
+ 'timestamp': 1504915200,
+ 'upload_date': '20170909',
+ },
+ }
+
+ @staticmethod
+ def _extract_urls(webpage):
+ return [
+ mobj.group('url')
+ for mobj in re.finditer(
+ r'<script[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?%s.+?)\1' % (ZypeIE._COMMON_RE % ZypeIE._ID_RE),
+ webpage)]
+
+ def _real_extract(self, url):
+ video_id = self._match_id(url)
+
+ try:
+ response = self._download_json(re.sub(
+ r'\.(?:js|html)\?', '.json?', url), video_id)['response']
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 401, 403):
+ raise ExtractorError(self._parse_json(
+ e.cause.read().decode(), video_id)['message'], expected=True)
+ raise
+
+ body = response['body']
+ video = response['video']
+ title = video['title']
+
+ if isinstance(body, dict):
+ formats = []
+ for output in body.get('outputs', []):
+ output_url = output.get('url')
+ if not output_url:
+ continue
+ name = output.get('name')
+ if name == 'm3u8':
+ formats = self._extract_m3u8_formats(
+ output_url, video_id, 'mp4',
+ 'm3u8_native', m3u8_id='hls', fatal=False)
+ else:
+ f = {
+ 'format_id': name,
+ 'tbr': int_or_none(output.get('bitrate')),
+ 'url': output_url,
+ }
+ if name in ('m4a', 'mp3'):
+ f['vcodec'] = 'none'
+ else:
+ f.update({
+ 'height': int_or_none(output.get('height')),
+ 'width': int_or_none(output.get('width')),
+ })
+ formats.append(f)
+ text_tracks = body.get('subtitles') or []
+ else:
+ m3u8_url = self._search_regex(
+ r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1',
+ body, 'm3u8 url', group='url')
+ formats = self._extract_m3u8_formats(
+ m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
+ text_tracks = self._search_regex(
+ r'textTracks\s*:\s*(\[[^]]+\])',
+ body, 'text tracks', default=None)
+ if text_tracks:
+ text_tracks = self._parse_json(
+ text_tracks, video_id, js_to_json, False)
+ self._sort_formats(formats)
+
+ subtitles = {}
+ if text_tracks:
+ for text_track in text_tracks:
+ tt_url = dict_get(text_track, ('file', 'src'))
+ if not tt_url:
+ continue
+ subtitles.setdefault(text_track.get('label') or 'English', []).append({
+ 'url': tt_url,
+ })
+
+ thumbnails = []
+ for thumbnail in video.get('thumbnails', []):
+ thumbnail_url = thumbnail.get('url')
+ if not thumbnail_url:
+ continue
+ thumbnails.append({
+ 'url': thumbnail_url,
+ 'width': int_or_none(thumbnail.get('width')),
+ 'height': int_or_none(thumbnail.get('height')),
+ })
+
+ return {
+ 'id': video_id,
+ 'display_id': video.get('friendly_title'),
+ 'title': title,
+ 'thumbnails': thumbnails,
+ 'description': dict_get(video, ('description', 'ott_description', 'short_description')),
+ 'timestamp': parse_iso8601(video.get('published_at')),
+ 'duration': int_or_none(video.get('duration')),
+ 'view_count': int_or_none(video.get('request_count')),
+ 'average_rating': int_or_none(video.get('rating')),
+ 'season_number': int_or_none(video.get('season')),
+ 'episode_number': int_or_none(video.get('episode')),
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
diff --git a/youtube_dl/jsinterp.py b/youtube_dlc/jsinterp.py
index 7bda59610..7bda59610 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dlc/jsinterp.py
diff --git a/youtube_dlc/options.py b/youtube_dlc/options.py
new file mode 100644
index 000000000..2cc5eee74
--- /dev/null
+++ b/youtube_dlc/options.py
@@ -0,0 +1,916 @@
+from __future__ import unicode_literals
+
+import os.path
+import optparse
+import re
+import sys
+
+from .downloader.external import list_external_downloaders
+from .compat import (
+ compat_expanduser,
+ compat_get_terminal_size,
+ compat_getenv,
+ compat_kwargs,
+ compat_shlex_split,
+)
+from .utils import (
+ preferredencoding,
+ write_string,
+)
+from .version import __version__
+
+
+def _hide_login_info(opts):
+ PRIVATE_OPTS = set(['-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'])
+ eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
+
+ def _scrub_eq(o):
+ m = eqre.match(o)
+ if m:
+ return m.group('key') + '=PRIVATE'
+ else:
+ return o
+
+ opts = list(map(_scrub_eq, opts))
+ for idx, opt in enumerate(opts):
+ if opt in PRIVATE_OPTS and idx + 1 < len(opts):
+ opts[idx + 1] = 'PRIVATE'
+ return opts
+
+
+def parseOpts(overrideArguments=None):
+ def _readOptions(filename_bytes, default=[]):
+ try:
+ optionf = open(filename_bytes)
+ except IOError:
+ return default # silently skip if file is not present
+ try:
+ # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
+ contents = optionf.read()
+ if sys.version_info < (3,):
+ contents = contents.decode(preferredencoding())
+ res = compat_shlex_split(contents, comments=True)
+ finally:
+ optionf.close()
+ return res
+
+ def _readUserConf():
+ xdg_config_home = compat_getenv('XDG_CONFIG_HOME')
+ if xdg_config_home:
+ userConfFile = os.path.join(xdg_config_home, 'youtube-dlc', 'config')
+ if not os.path.isfile(userConfFile):
+ userConfFile = os.path.join(xdg_config_home, 'youtube-dlc.conf')
+ else:
+ userConfFile = os.path.join(compat_expanduser('~'), '.config', 'youtube-dlc', 'config')
+ if not os.path.isfile(userConfFile):
+ userConfFile = os.path.join(compat_expanduser('~'), '.config', 'youtube-dlc.conf')
+ userConf = _readOptions(userConfFile, None)
+
+ if userConf is None:
+ appdata_dir = compat_getenv('appdata')
+ if appdata_dir:
+ userConf = _readOptions(
+ os.path.join(appdata_dir, 'youtube-dlc', 'config'),
+ default=None)
+ if userConf is None:
+ userConf = _readOptions(
+ os.path.join(appdata_dir, 'youtube-dlc', 'config.txt'),
+ default=None)
+
+ if userConf is None:
+ userConf = _readOptions(
+ os.path.join(compat_expanduser('~'), 'youtube-dlc.conf'),
+ default=None)
+ if userConf is None:
+ userConf = _readOptions(
+ os.path.join(compat_expanduser('~'), 'youtube-dlc.conf.txt'),
+ default=None)
+
+ if userConf is None:
+ userConf = []
+
+ return userConf
+
+ def _format_option_string(option):
+ ''' ('-o', '--option') -> -o, --format METAVAR'''
+
+ opts = []
+
+ if option._short_opts:
+ opts.append(option._short_opts[0])
+ if option._long_opts:
+ opts.append(option._long_opts[0])
+ if len(opts) > 1:
+ opts.insert(1, ', ')
+
+ if option.takes_value():
+ opts.append(' %s' % option.metavar)
+
+ return ''.join(opts)
+
+ def _comma_separated_values_options_callback(option, opt_str, value, parser):
+ setattr(parser.values, option.dest, value.split(','))
+
+ # No need to wrap help messages if we're on a wide console
+ columns = compat_get_terminal_size().columns
+ max_width = columns if columns else 80
+ max_help_position = 80
+
+ fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
+ fmt.format_option_strings = _format_option_string
+
+ kw = {
+ 'version': __version__,
+ 'formatter': fmt,
+ 'usage': '%prog [OPTIONS] URL [URL...]',
+ 'conflict_handler': 'resolve',
+ }
+
+ parser = optparse.OptionParser(**compat_kwargs(kw))
+
+ general = optparse.OptionGroup(parser, 'General Options')
+ general.add_option(
+ '-h', '--help',
+ action='help',
+ help='Print this help text and exit')
+ general.add_option(
+ '--version',
+ action='version',
+ help='Print program version and exit')
+ general.add_option(
+ '-U', '--update',
+ action='store_true', dest='update_self',
+ help='Update this program to latest version. Make sure that you have sufficient permissions (run with sudo if needed)')
+ general.add_option(
+ '-i', '--ignore-errors',
+ action='store_true', dest='ignoreerrors', default=False,
+ help='Continue on download errors, for example to skip unavailable videos in a playlist')
+ general.add_option(
+ '--abort-on-error',
+ action='store_false', dest='ignoreerrors',
+ help='Abort downloading of further videos (in the playlist or the command line) if an error occurs')
+ general.add_option(
+ '--dump-user-agent',
+ action='store_true', dest='dump_user_agent', default=False,
+ help='Display the current browser identification')
+ general.add_option(
+ '--list-extractors',
+ action='store_true', dest='list_extractors', default=False,
+ help='List all supported extractors')
+ general.add_option(
+ '--extractor-descriptions',
+ action='store_true', dest='list_extractor_descriptions', default=False,
+ help='Output descriptions of all supported extractors')
+ general.add_option(
+ '--force-generic-extractor',
+ action='store_true', dest='force_generic_extractor', default=False,
+ help='Force extraction to use the generic extractor')
+ general.add_option(
+ '--default-search',
+ dest='default_search', metavar='PREFIX',
+ help='Use this prefix for unqualified URLs. For example "gvsearch2:" downloads two videos from google videos for youtube-dlc "large apple". Use the value "auto" to let youtube-dlc guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching.')
+ general.add_option(
+ '--ignore-config',
+ action='store_true',
+ help='Do not read configuration files. '
+ 'When given in the global configuration file /etc/youtube-dlc.conf: '
+ 'Do not read the user configuration in ~/.config/youtube-dlc/config '
+ '(%APPDATA%/youtube-dlc/config.txt on Windows)')
+ general.add_option(
+ '--config-location',
+ dest='config_location', metavar='PATH',
+ help='Location of the configuration file; either the path to the config or its containing directory.')
+ general.add_option(
+ '--flat-playlist',
+ action='store_const', dest='extract_flat', const='in_playlist',
+ default=False,
+ help='Do not extract the videos of a playlist, only list them.')
+ general.add_option(
+ '--mark-watched',
+ action='store_true', dest='mark_watched', default=False,
+ help='Mark videos watched (YouTube only)')
+ general.add_option(
+ '--no-mark-watched',
+ action='store_false', dest='mark_watched', default=False,
+ help='Do not mark videos watched (YouTube only)')
+ general.add_option(
+ '--no-color', '--no-colors',
+ action='store_true', dest='no_color',
+ default=False,
+ help='Do not emit color codes in output')
+
+ network = optparse.OptionGroup(parser, 'Network Options')
+ network.add_option(
+ '--proxy', dest='proxy',
+ default=None, metavar='URL',
+ help='Use the specified HTTP/HTTPS/SOCKS proxy. To enable '
+ 'SOCKS proxy, specify a proper scheme. For example '
+ 'socks5://127.0.0.1:1080/. Pass in an empty string (--proxy "") '
+ 'for direct connection')
+ network.add_option(
+ '--socket-timeout',
+ dest='socket_timeout', type=float, default=None, metavar='SECONDS',
+ help='Time to wait before giving up, in seconds')
+ network.add_option(
+ '--source-address',
+ metavar='IP', dest='source_address', default=None,
+ help='Client-side IP address to bind to',
+ )
+ network.add_option(
+ '-4', '--force-ipv4',
+ action='store_const', const='0.0.0.0', dest='source_address',
+ help='Make all connections via IPv4',
+ )
+ network.add_option(
+ '-6', '--force-ipv6',
+ action='store_const', const='::', dest='source_address',
+ help='Make all connections via IPv6',
+ )
+
+ geo = optparse.OptionGroup(parser, 'Geo Restriction')
+ geo.add_option(
+ '--geo-verification-proxy',
+ dest='geo_verification_proxy', default=None, metavar='URL',
+ help='Use this proxy to verify the IP address for some geo-restricted sites. '
+ 'The default proxy specified by --proxy (or none, if the option is not present) is used for the actual downloading.')
+ geo.add_option(
+ '--cn-verification-proxy',
+ dest='cn_verification_proxy', default=None, metavar='URL',
+ help=optparse.SUPPRESS_HELP)
+ geo.add_option(
+ '--geo-bypass',
+ action='store_true', dest='geo_bypass', default=True,
+ help='Bypass geographic restriction via faking X-Forwarded-For HTTP header')
+ geo.add_option(
+ '--no-geo-bypass',
+ action='store_false', dest='geo_bypass', default=True,
+ help='Do not bypass geographic restriction via faking X-Forwarded-For HTTP header')
+ geo.add_option(
+ '--geo-bypass-country', metavar='CODE',
+ dest='geo_bypass_country', default=None,
+ help='Force bypass geographic restriction with explicitly provided two-letter ISO 3166-2 country code')
+ geo.add_option(
+ '--geo-bypass-ip-block', metavar='IP_BLOCK',
+ dest='geo_bypass_ip_block', default=None,
+ help='Force bypass geographic restriction with explicitly provided IP block in CIDR notation')
+
+ selection = optparse.OptionGroup(parser, 'Video Selection')
+ selection.add_option(
+ '--playlist-start',
+ dest='playliststart', metavar='NUMBER', default=1, type=int,
+ help='Playlist video to start at (default is %default)')
+ selection.add_option(
+ '--playlist-end',
+ dest='playlistend', metavar='NUMBER', default=None, type=int,
+ help='Playlist video to end at (default is last)')
+ selection.add_option(
+ '--playlist-items',
+ dest='playlist_items', metavar='ITEM_SPEC', default=None,
+ help='Playlist video items to download. Specify indices of the videos in the playlist separated by commas like: "--playlist-items 1,2,5,8" if you want to download videos indexed 1, 2, 5, 8 in the playlist. You can specify range: "--playlist-items 1-3,7,10-13", it will download the videos at index 1, 2, 3, 7, 10, 11, 12 and 13.')
+ selection.add_option(
+ '--match-title',
+ dest='matchtitle', metavar='REGEX',
+ help='Download only matching titles (regex or caseless sub-string)')
+ selection.add_option(
+ '--reject-title',
+ dest='rejecttitle', metavar='REGEX',
+ help='Skip download for matching titles (regex or caseless sub-string)')
+ selection.add_option(
+ '--max-downloads',
+ dest='max_downloads', metavar='NUMBER', type=int, default=None,
+ help='Abort after downloading NUMBER files')
+ selection.add_option(
+ '--min-filesize',
+ metavar='SIZE', dest='min_filesize', default=None,
+ help='Do not download any videos smaller than SIZE (e.g. 50k or 44.6m)')
+ selection.add_option(
+ '--max-filesize',
+ metavar='SIZE', dest='max_filesize', default=None,
+ help='Do not download any videos larger than SIZE (e.g. 50k or 44.6m)')
+ selection.add_option(
+ '--date',
+ metavar='DATE', dest='date', default=None,
+ help='Download only videos uploaded in this date')
+ selection.add_option(
+ '--datebefore',
+ metavar='DATE', dest='datebefore', default=None,
+ help='Download only videos uploaded on or before this date (i.e. inclusive)')
+ selection.add_option(
+ '--dateafter',
+ metavar='DATE', dest='dateafter', default=None,
+ help='Download only videos uploaded on or after this date (i.e. inclusive)')
+ selection.add_option(
+ '--min-views',
+ metavar='COUNT', dest='min_views', default=None, type=int,
+ help='Do not download any videos with less than COUNT views')
+ selection.add_option(
+ '--max-views',
+ metavar='COUNT', dest='max_views', default=None, type=int,
+ help='Do not download any videos with more than COUNT views')
+ selection.add_option(
+ '--match-filter',
+ metavar='FILTER', dest='match_filter', default=None,
+ help=(
+ 'Generic video filter. '
+ 'Specify any key (see the "OUTPUT TEMPLATE" for a list of available keys) to '
+ 'match if the key is present, '
+ '!key to check if the key is not present, '
+ 'key > NUMBER (like "comment_count > 12", also works with '
+ '>=, <, <=, !=, =) to compare against a number, '
+ 'key = \'LITERAL\' (like "uploader = \'Mike Smith\'", also works with !=) '
+ 'to match against a string literal '
+ 'and & to require multiple matches. '
+ 'Values which are not known are excluded unless you '
+ 'put a question mark (?) after the operator. '
+ 'For example, to only match videos that have been liked more than '
+ '100 times and disliked less than 50 times (or the dislike '
+ 'functionality is not available at the given service), but who '
+ 'also have a description, use --match-filter '
+ '"like_count > 100 & dislike_count <? 50 & description" .'
+ ))
+ selection.add_option(
+ '--no-playlist',
+ action='store_true', dest='noplaylist', default=False,
+ help='Download only the video, if the URL refers to a video and a playlist.')
+ selection.add_option(
+ '--yes-playlist',
+ action='store_false', dest='noplaylist', default=False,
+ help='Download the playlist, if the URL refers to a video and a playlist.')
+ selection.add_option(
+ '--age-limit',
+ metavar='YEARS', dest='age_limit', default=None, type=int,
+ help='Download only videos suitable for the given age')
+ selection.add_option(
+ '--download-archive', metavar='FILE',
+ dest='download_archive',
+ help='Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it.')
+ selection.add_option(
+ '--include-ads',
+ dest='include_ads', action='store_true',
+ help='Download advertisements as well (experimental)')
+
+ authentication = optparse.OptionGroup(parser, 'Authentication Options')
+ authentication.add_option(
+ '-u', '--username',
+ dest='username', metavar='USERNAME',
+ help='Login with this account ID')
+ authentication.add_option(
+ '-p', '--password',
+ dest='password', metavar='PASSWORD',
+ help='Account password. If this option is left out, youtube-dlc will ask interactively.')
+ authentication.add_option(
+ '-2', '--twofactor',
+ dest='twofactor', metavar='TWOFACTOR',
+ help='Two-factor authentication code')
+ authentication.add_option(
+ '-n', '--netrc',
+ action='store_true', dest='usenetrc', default=False,
+ help='Use .netrc authentication data')
+ authentication.add_option(
+ '--video-password',
+ dest='videopassword', metavar='PASSWORD',
+ help='Video password (vimeo, smotri, youku)')
+
+ adobe_pass = optparse.OptionGroup(parser, 'Adobe Pass Options')
+ adobe_pass.add_option(
+ '--ap-mso',
+ dest='ap_mso', metavar='MSO',
+ help='Adobe Pass multiple-system operator (TV provider) identifier, use --ap-list-mso for a list of available MSOs')
+ adobe_pass.add_option(
+ '--ap-username',
+ dest='ap_username', metavar='USERNAME',
+ help='Multiple-system operator account login')
+ adobe_pass.add_option(
+ '--ap-password',
+ dest='ap_password', metavar='PASSWORD',
+ help='Multiple-system operator account password. If this option is left out, youtube-dlc will ask interactively.')
+ adobe_pass.add_option(
+ '--ap-list-mso',
+ action='store_true', dest='ap_list_mso', default=False,
+ help='List all supported multiple-system operators')
+
+ video_format = optparse.OptionGroup(parser, 'Video Format Options')
+ video_format.add_option(
+ '-f', '--format',
+ action='store', dest='format', metavar='FORMAT', default=None,
+ help='Video format code, see the "FORMAT SELECTION" for all the info')
+ video_format.add_option(
+ '--all-formats',
+ action='store_const', dest='format', const='all',
+ help='Download all available video formats')
+ video_format.add_option(
+ '--prefer-free-formats',
+ action='store_true', dest='prefer_free_formats', default=False,
+ help='Prefer free video formats unless a specific one is requested')
+ video_format.add_option(
+ '-F', '--list-formats',
+ action='store_true', dest='listformats',
+ help='List all available formats of requested videos')
+ video_format.add_option(
+ '--youtube-include-dash-manifest',
+ action='store_true', dest='youtube_include_dash_manifest', default=True,
+ help=optparse.SUPPRESS_HELP)
+ video_format.add_option(
+ '--youtube-skip-dash-manifest',
+ action='store_false', dest='youtube_include_dash_manifest',
+ help='Do not download the DASH manifests and related data on YouTube videos')
+ video_format.add_option(
+ '--merge-output-format',
+ action='store', dest='merge_output_format', metavar='FORMAT', default=None,
+ help=(
+ 'If a merge is required (e.g. bestvideo+bestaudio), '
+ 'output to given container format. One of mkv, mp4, ogg, webm, flv. '
+ 'Ignored if no merge is required'))
+
+ subtitles = optparse.OptionGroup(parser, 'Subtitle Options')
+ subtitles.add_option(
+ '--write-sub', '--write-srt',
+ action='store_true', dest='writesubtitles', default=False,
+ help='Write subtitle file')
+ subtitles.add_option(
+ '--write-auto-sub', '--write-automatic-sub',
+ action='store_true', dest='writeautomaticsub', default=False,
+ help='Write automatically generated subtitle file (YouTube only)')
+ subtitles.add_option(
+ '--all-subs',
+ action='store_true', dest='allsubtitles', default=False,
+ help='Download all the available subtitles of the video')
+ subtitles.add_option(
+ '--list-subs',
+ action='store_true', dest='listsubtitles', default=False,
+ help='List all available subtitles for the video')
+ subtitles.add_option(
+ '--sub-format',
+ action='store', dest='subtitlesformat', metavar='FORMAT', default='best',
+ help='Subtitle format, accepts formats preference, for example: "srt" or "ass/srt/best"')
+ subtitles.add_option(
+ '--sub-lang', '--sub-langs', '--srt-lang',
+ action='callback', dest='subtitleslangs', metavar='LANGS', type='str',
+ default=[], callback=_comma_separated_values_options_callback,
+ help='Languages of the subtitles to download (optional) separated by commas, use --list-subs for available language tags')
+
+ downloader = optparse.OptionGroup(parser, 'Download Options')
+ downloader.add_option(
+ '-r', '--limit-rate', '--rate-limit',
+ dest='ratelimit', metavar='RATE',
+ help='Maximum download rate in bytes per second (e.g. 50K or 4.2M)')
+ downloader.add_option(
+ '-R', '--retries',
+ dest='retries', metavar='RETRIES', default=10,
+ help='Number of retries (default is %default), or "infinite".')
+ downloader.add_option(
+ '--fragment-retries',
+ dest='fragment_retries', metavar='RETRIES', default=10,
+ help='Number of retries for a fragment (default is %default), or "infinite" (DASH, hlsnative and ISM)')
+ downloader.add_option(
+ '--skip-unavailable-fragments',
+ action='store_true', dest='skip_unavailable_fragments', default=True,
+ help='Skip unavailable fragments (DASH, hlsnative and ISM)')
+ downloader.add_option(
+ '--abort-on-unavailable-fragment',
+ action='store_false', dest='skip_unavailable_fragments',
+ help='Abort downloading when some fragment is not available')
+ downloader.add_option(
+ '--keep-fragments',
+ action='store_true', dest='keep_fragments', default=False,
+ help='Keep downloaded fragments on disk after downloading is finished; fragments are erased by default')
+ downloader.add_option(
+ '--buffer-size',
+ dest='buffersize', metavar='SIZE', default='1024',
+ help='Size of download buffer (e.g. 1024 or 16K) (default is %default)')
+ downloader.add_option(
+ '--no-resize-buffer',
+ action='store_true', dest='noresizebuffer', default=False,
+ help='Do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.')
+ downloader.add_option(
+ '--http-chunk-size',
+ dest='http_chunk_size', metavar='SIZE', default=None,
+ help='Size of a chunk for chunk-based HTTP downloading (e.g. 10485760 or 10M) (default is disabled). '
+ 'May be useful for bypassing bandwidth throttling imposed by a webserver (experimental)')
+ downloader.add_option(
+ '--test',
+ action='store_true', dest='test', default=False,
+ help=optparse.SUPPRESS_HELP)
+ downloader.add_option(
+ '--playlist-reverse',
+ action='store_true',
+ help='Download playlist videos in reverse order')
+ downloader.add_option(
+ '--playlist-random',
+ action='store_true',
+ help='Download playlist videos in random order')
+ downloader.add_option(
+ '--xattr-set-filesize',
+ dest='xattr_set_filesize', action='store_true',
+ help='Set file xattribute ytdl.filesize with expected file size')
+ downloader.add_option(
+ '--hls-prefer-native',
+ dest='hls_prefer_native', action='store_true', default=None,
+ help='Use the native HLS downloader instead of ffmpeg')
+ downloader.add_option(
+ '--hls-prefer-ffmpeg',
+ dest='hls_prefer_native', action='store_false', default=None,
+ help='Use ffmpeg instead of the native HLS downloader')
+ downloader.add_option(
+ '--hls-use-mpegts',
+ dest='hls_use_mpegts', action='store_true',
+ help='Use the mpegts container for HLS videos, allowing to play the '
+ 'video while downloading (some players may not be able to play it)')
+ downloader.add_option(
+ '--external-downloader',
+ dest='external_downloader', metavar='COMMAND',
+ help='Use the specified external downloader. '
+ 'Currently supports %s' % ','.join(list_external_downloaders()))
+ downloader.add_option(
+ '--external-downloader-args',
+ dest='external_downloader_args', metavar='ARGS',
+ help='Give these arguments to the external downloader')
+
+ workarounds = optparse.OptionGroup(parser, 'Workarounds')
+ workarounds.add_option(
+ '--encoding',
+ dest='encoding', metavar='ENCODING',
+ help='Force the specified encoding (experimental)')
+ workarounds.add_option(
+ '--no-check-certificate',
+ action='store_true', dest='no_check_certificate', default=False,
+ help='Suppress HTTPS certificate validation')
+ workarounds.add_option(
+ '--prefer-insecure',
+ '--prefer-unsecure', action='store_true', dest='prefer_insecure',
+ help='Use an unencrypted connection to retrieve information about the video. (Currently supported only for YouTube)')
+ workarounds.add_option(
+ '--user-agent',
+ metavar='UA', dest='user_agent',
+ help='Specify a custom user agent')
+ workarounds.add_option(
+ '--referer',
+ metavar='URL', dest='referer', default=None,
+ help='Specify a custom referer, use if the video access is restricted to one domain',
+ )
+ workarounds.add_option(
+ '--add-header',
+ metavar='FIELD:VALUE', dest='headers', action='append',
+ help='Specify a custom HTTP header and its value, separated by a colon \':\'. You can use this option multiple times',
+ )
+ workarounds.add_option(
+ '--bidi-workaround',
+ dest='bidi_workaround', action='store_true',
+ help='Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH')
+ workarounds.add_option(
+ '--sleep-interval', '--min-sleep-interval', metavar='SECONDS',
+ dest='sleep_interval', type=float,
+ help=(
+ 'Number of seconds to sleep before each download when used alone '
+ 'or a lower bound of a range for randomized sleep before each download '
+ '(minimum possible number of seconds to sleep) when used along with '
+ '--max-sleep-interval.'))
+ workarounds.add_option(
+ '--max-sleep-interval', metavar='SECONDS',
+ dest='max_sleep_interval', type=float,
+ help=(
+ 'Upper bound of a range for randomized sleep before each download '
+ '(maximum possible number of seconds to sleep). Must only be used '
+ 'along with --min-sleep-interval.'))
+
+ verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
+ verbosity.add_option(
+ '-q', '--quiet',
+ action='store_true', dest='quiet', default=False,
+ help='Activate quiet mode')
+ verbosity.add_option(
+ '--no-warnings',
+ dest='no_warnings', action='store_true', default=False,
+ help='Ignore warnings')
+ verbosity.add_option(
+ '-s', '--simulate',
+ action='store_true', dest='simulate', default=False,
+ help='Do not download the video and do not write anything to disk')
+ verbosity.add_option(
+ '--skip-download',
+ action='store_true', dest='skip_download', default=False,
+ help='Do not download the video')
+ verbosity.add_option(
+ '-g', '--get-url',
+ action='store_true', dest='geturl', default=False,
+ help='Simulate, quiet but print URL')
+ verbosity.add_option(
+ '-e', '--get-title',
+ action='store_true', dest='gettitle', default=False,
+ help='Simulate, quiet but print title')
+ verbosity.add_option(
+ '--get-id',
+ action='store_true', dest='getid', default=False,
+ help='Simulate, quiet but print id')
+ verbosity.add_option(
+ '--get-thumbnail',
+ action='store_true', dest='getthumbnail', default=False,
+ help='Simulate, quiet but print thumbnail URL')
+ verbosity.add_option(
+ '--get-description',
+ action='store_true', dest='getdescription', default=False,
+ help='Simulate, quiet but print video description')
+ verbosity.add_option(
+ '--get-duration',
+ action='store_true', dest='getduration', default=False,
+ help='Simulate, quiet but print video length')
+ verbosity.add_option(
+ '--get-filename',
+ action='store_true', dest='getfilename', default=False,
+ help='Simulate, quiet but print output filename')
+ verbosity.add_option(
+ '--get-format',
+ action='store_true', dest='getformat', default=False,
+ help='Simulate, quiet but print output format')
+ verbosity.add_option(
+ '-j', '--dump-json',
+ action='store_true', dest='dumpjson', default=False,
+ help='Simulate, quiet but print JSON information. See the "OUTPUT TEMPLATE" for a description of available keys.')
+ verbosity.add_option(
+ '-J', '--dump-single-json',
+ action='store_true', dest='dump_single_json', default=False,
+ help='Simulate, quiet but print JSON information for each command-line argument. If the URL refers to a playlist, dump the whole playlist information in a single line.')
+ verbosity.add_option(
+ '--print-json',
+ action='store_true', dest='print_json', default=False,
+ help='Be quiet and print the video information as JSON (video is still being downloaded).',
+ )
+ verbosity.add_option(
+ '--newline',
+ action='store_true', dest='progress_with_newline', default=False,
+ help='Output progress bar as new lines')
+ verbosity.add_option(
+ '--no-progress',
+ action='store_true', dest='noprogress', default=False,
+ help='Do not print progress bar')
+ verbosity.add_option(
+ '--console-title',
+ action='store_true', dest='consoletitle', default=False,
+ help='Display progress in console titlebar')
+ verbosity.add_option(
+ '-v', '--verbose',
+ action='store_true', dest='verbose', default=False,
+ help='Print various debugging information')
+ verbosity.add_option(
+ '--dump-pages', '--dump-intermediate-pages',
+ action='store_true', dest='dump_intermediate_pages', default=False,
+ help='Print downloaded pages encoded using base64 to debug problems (very verbose)')
+ verbosity.add_option(
+ '--write-pages',
+ action='store_true', dest='write_pages', default=False,
+ help='Write downloaded intermediary pages to files in the current directory to debug problems')
+ verbosity.add_option(
+ '--youtube-print-sig-code',
+ action='store_true', dest='youtube_print_sig_code', default=False,
+ help=optparse.SUPPRESS_HELP)
+ verbosity.add_option(
+ '--print-traffic', '--dump-headers',
+ dest='debug_printtraffic', action='store_true', default=False,
+ help='Display sent and read HTTP traffic')
+ verbosity.add_option(
+ '-C', '--call-home',
+ dest='call_home', action='store_true', default=False,
+ help='Contact the youtube-dlc server for debugging')
+ verbosity.add_option(
+ '--no-call-home',
+ dest='call_home', action='store_false', default=False,
+ help='Do NOT contact the youtube-dlc server for debugging')
+
+ filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
+ filesystem.add_option(
+ '-a', '--batch-file',
+ dest='batchfile', metavar='FILE',
+ help="File containing URLs to download ('-' for stdin), one URL per line. "
+ "Lines starting with '#', ';' or ']' are considered as comments and ignored.")
+ filesystem.add_option(
+ '--id', default=False,
+ action='store_true', dest='useid', help='Use only video ID in file name')
+ filesystem.add_option(
+ '-o', '--output',
+ dest='outtmpl', metavar='TEMPLATE',
+ help=('Output filename template, see the "OUTPUT TEMPLATE" for all the info'))
+ filesystem.add_option(
+ '--autonumber-size',
+ dest='autonumber_size', metavar='NUMBER', type=int,
+ help=optparse.SUPPRESS_HELP)
+ filesystem.add_option(
+ '--autonumber-start',
+ dest='autonumber_start', metavar='NUMBER', default=1, type=int,
+ help='Specify the start value for %(autonumber)s (default is %default)')
+ filesystem.add_option(
+ '--restrict-filenames',
+ action='store_true', dest='restrictfilenames', default=False,
+ help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames')
+ filesystem.add_option(
+ '-A', '--auto-number',
+ action='store_true', dest='autonumber', default=False,
+ help=optparse.SUPPRESS_HELP)
+ filesystem.add_option(
+ '-t', '--title',
+ action='store_true', dest='usetitle', default=False,
+ help=optparse.SUPPRESS_HELP)
+ filesystem.add_option(
+ '-l', '--literal', default=False,
+ action='store_true', dest='usetitle',
+ help=optparse.SUPPRESS_HELP)
+ filesystem.add_option(
+ '-w', '--no-overwrites',
+ action='store_true', dest='nooverwrites', default=False,
+ help='Do not overwrite files')
+ filesystem.add_option(
+ '-c', '--continue',
+ action='store_true', dest='continue_dl', default=True,
+ help='Force resume of partially downloaded files. By default, youtube-dlc will resume downloads if possible.')
+ filesystem.add_option(
+ '--no-continue',
+ action='store_false', dest='continue_dl',
+ help='Do not resume partially downloaded files (restart from beginning)')
+ filesystem.add_option(
+ '--no-part',
+ action='store_true', dest='nopart', default=False,
+ help='Do not use .part files - write directly into output file')
+ filesystem.add_option(
+ '--no-mtime',
+ action='store_false', dest='updatetime', default=True,
+ help='Do not use the Last-modified header to set the file modification time')
+ filesystem.add_option(
+ '--write-description',
+ action='store_true', dest='writedescription', default=False,
+ help='Write video description to a .description file')
+ filesystem.add_option(
+ '--write-info-json',
+ action='store_true', dest='writeinfojson', default=False,
+ help='Write video metadata to a .info.json file')
+ filesystem.add_option(
+ '--write-annotations',
+ action='store_true', dest='writeannotations', default=False,
+ help='Write video annotations to a .annotations.xml file')
+ filesystem.add_option(
+ '--load-info-json', '--load-info',
+ dest='load_info_filename', metavar='FILE',
+ help='JSON file containing the video information (created with the "--write-info-json" option)')
+ filesystem.add_option(
+ '--cookies',
+ dest='cookiefile', metavar='FILE',
+ help='File to read cookies from and dump cookie jar in')
+ filesystem.add_option(
+ '--cache-dir', dest='cachedir', default=None, metavar='DIR',
+ help='Location in the filesystem where youtube-dlc can store some downloaded information permanently. By default $XDG_CACHE_HOME/youtube-dlc or ~/.cache/youtube-dlc . At the moment, only YouTube player files (for videos with obfuscated signatures) are cached, but that may change.')
+ filesystem.add_option(
+ '--no-cache-dir', action='store_const', const=False, dest='cachedir',
+ help='Disable filesystem caching')
+ filesystem.add_option(
+ '--rm-cache-dir',
+ action='store_true', dest='rm_cachedir',
+ help='Delete all filesystem cache files')
+
+ thumbnail = optparse.OptionGroup(parser, 'Thumbnail images')
+ thumbnail.add_option(
+ '--write-thumbnail',
+ action='store_true', dest='writethumbnail', default=False,
+ help='Write thumbnail image to disk')
+ thumbnail.add_option(
+ '--write-all-thumbnails',
+ action='store_true', dest='write_all_thumbnails', default=False,
+ help='Write all thumbnail image formats to disk')
+ thumbnail.add_option(
+ '--list-thumbnails',
+ action='store_true', dest='list_thumbnails', default=False,
+ help='Simulate and list all available thumbnail formats')
+
+ postproc = optparse.OptionGroup(parser, 'Post-processing Options')
+ postproc.add_option(
+ '-x', '--extract-audio',
+ action='store_true', dest='extractaudio', default=False,
+ help='Convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
+ postproc.add_option(
+ '--audio-format', metavar='FORMAT', dest='audioformat', default='best',
+ help='Specify audio format: "best", "aac", "flac", "mp3", "m4a", "opus", "vorbis", or "wav"; "%default" by default; No effect without -x')
+ postproc.add_option(
+ '--audio-quality', metavar='QUALITY',
+ dest='audioquality', default='5',
+ help='Specify ffmpeg/avconv audio quality, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default %default)')
+ postproc.add_option(
+ '--recode-video',
+ metavar='FORMAT', dest='recodevideo', default=None,
+ help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm|mkv|avi)')
+ postproc.add_option(
+ '--postprocessor-args',
+ dest='postprocessor_args', metavar='ARGS',
+ help='Give these arguments to the postprocessor')
+ postproc.add_option(
+ '-k', '--keep-video',
+ action='store_true', dest='keepvideo', default=False,
+ help='Keep the video file on disk after the post-processing; the video is erased by default')
+ postproc.add_option(
+ '--no-post-overwrites',
+ action='store_true', dest='nopostoverwrites', default=False,
+ help='Do not overwrite post-processed files; the post-processed files are overwritten by default')
+ postproc.add_option(
+ '--embed-subs',
+ action='store_true', dest='embedsubtitles', default=False,
+ help='Embed subtitles in the video (only for mp4, webm and mkv videos)')
+ postproc.add_option(
+ '--embed-thumbnail',
+ action='store_true', dest='embedthumbnail', default=False,
+ help='Embed thumbnail in the audio as cover art')
+ postproc.add_option(
+ '--add-metadata',
+ action='store_true', dest='addmetadata', default=False,
+ help='Write metadata to the video file')
+ postproc.add_option(
+ '--metadata-from-title',
+ metavar='FORMAT', dest='metafromtitle',
+ help='Parse additional metadata like song title / artist from the video title. '
+ 'The format syntax is the same as --output. Regular expression with '
+ 'named capture groups may also be used. '
+ 'The parsed parameters replace existing values. '
+ 'Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like '
+ '"Coldplay - Paradise". '
+ 'Example (regex): --metadata-from-title "(?P<artist>.+?) - (?P<title>.+)"')
+ postproc.add_option(
+ '--xattrs',
+ action='store_true', dest='xattrs', default=False,
+ help='Write metadata to the video file\'s xattrs (using dublin core and xdg standards)')
+ postproc.add_option(
+ '--fixup',
+ metavar='POLICY', dest='fixup', default='detect_or_warn',
+ help='Automatically correct known faults of the file. '
+ 'One of never (do nothing), warn (only emit a warning), '
+ 'detect_or_warn (the default; fix file if we can, warn otherwise)')
+ postproc.add_option(
+ '--prefer-avconv',
+ action='store_false', dest='prefer_ffmpeg',
+ help='Prefer avconv over ffmpeg for running the postprocessors')
+ postproc.add_option(
+ '--prefer-ffmpeg',
+ action='store_true', dest='prefer_ffmpeg',
+ help='Prefer ffmpeg over avconv for running the postprocessors (default)')
+ postproc.add_option(
+ '--ffmpeg-location', '--avconv-location', metavar='PATH',
+ dest='ffmpeg_location',
+ help='Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory.')
+ postproc.add_option(
+ '--exec',
+ metavar='CMD', dest='exec_cmd',
+ help='Execute a command on the file after downloading and post-processing, similar to find\'s -exec syntax. Example: --exec \'adb push {} /sdcard/Music/ && rm {}\'')
+ postproc.add_option(
+ '--convert-subs', '--convert-subtitles',
+ metavar='FORMAT', dest='convertsubtitles', default=None,
+ help='Convert the subtitles to other format (currently supported: srt|ass|vtt|lrc)')
+
+ parser.add_option_group(general)
+ parser.add_option_group(network)
+ parser.add_option_group(geo)
+ parser.add_option_group(selection)
+ parser.add_option_group(downloader)
+ parser.add_option_group(filesystem)
+ parser.add_option_group(thumbnail)
+ parser.add_option_group(verbosity)
+ parser.add_option_group(workarounds)
+ parser.add_option_group(video_format)
+ parser.add_option_group(subtitles)
+ parser.add_option_group(authentication)
+ parser.add_option_group(adobe_pass)
+ parser.add_option_group(postproc)
+
+ if overrideArguments is not None:
+ opts, args = parser.parse_args(overrideArguments)
+ if opts.verbose:
+ write_string('[debug] Override config: ' + repr(overrideArguments) + '\n')
+ else:
+ def compat_conf(conf):
+ if sys.version_info < (3,):
+ return [a.decode(preferredencoding(), 'replace') for a in conf]
+ return conf
+
+ command_line_conf = compat_conf(sys.argv[1:])
+ opts, args = parser.parse_args(command_line_conf)
+
+ system_conf = user_conf = custom_conf = []
+
+ if '--config-location' in command_line_conf:
+ location = compat_expanduser(opts.config_location)
+ if os.path.isdir(location):
+ location = os.path.join(location, 'youtube-dlc.conf')
+ if not os.path.exists(location):
+ parser.error('config-location %s does not exist.' % location)
+ custom_conf = _readOptions(location)
+ elif '--ignore-config' in command_line_conf:
+ pass
+ else:
+ system_conf = _readOptions('/etc/youtube-dlc.conf')
+ if '--ignore-config' not in system_conf:
+ user_conf = _readUserConf()
+
+ argv = system_conf + user_conf + custom_conf + command_line_conf
+ opts, args = parser.parse_args(argv)
+ if opts.verbose:
+ for conf_label, conf in (
+ ('System config', system_conf),
+ ('User config', user_conf),
+ ('Custom config', custom_conf),
+ ('Command-line args', command_line_conf)):
+ write_string('[debug] %s: %s\n' % (conf_label, repr(_hide_login_info(conf))))
+
+ return parser, opts, args
diff --git a/youtube_dl/postprocessor/__init__.py b/youtube_dlc/postprocessor/__init__.py
index 3ea518399..3ea518399 100644
--- a/youtube_dl/postprocessor/__init__.py
+++ b/youtube_dlc/postprocessor/__init__.py
diff --git a/youtube_dl/postprocessor/common.py b/youtube_dlc/postprocessor/common.py
index 599dd1df2..599dd1df2 100644
--- a/youtube_dl/postprocessor/common.py
+++ b/youtube_dlc/postprocessor/common.py
diff --git a/youtube_dlc/postprocessor/embedthumbnail.py b/youtube_dlc/postprocessor/embedthumbnail.py
new file mode 100644
index 000000000..e2002ab0b
--- /dev/null
+++ b/youtube_dlc/postprocessor/embedthumbnail.py
@@ -0,0 +1,115 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+
+import os
+import subprocess
+
+from .ffmpeg import FFmpegPostProcessor
+
+from ..utils import (
+ check_executable,
+ encodeArgument,
+ encodeFilename,
+ PostProcessingError,
+ prepend_extension,
+ shell_quote
+)
+
+
+class EmbedThumbnailPPError(PostProcessingError):
+ pass
+
+
+class EmbedThumbnailPP(FFmpegPostProcessor):
+ def __init__(self, downloader=None, already_have_thumbnail=False):
+ super(EmbedThumbnailPP, self).__init__(downloader)
+ self._already_have_thumbnail = already_have_thumbnail
+
+ def run(self, info):
+ filename = info['filepath']
+ temp_filename = prepend_extension(filename, 'temp')
+
+ if not info.get('thumbnails'):
+ self._downloader.to_screen('[embedthumbnail] There aren\'t any thumbnails to embed')
+ return [], info
+
+ thumbnail_filename = info['thumbnails'][-1]['filename']
+
+ if not os.path.exists(encodeFilename(thumbnail_filename)):
+ self._downloader.report_warning(
+ 'Skipping embedding the thumbnail because the file is missing.')
+ return [], info
+
+ # Check for mislabeled webp file
+ with open(encodeFilename(thumbnail_filename), "rb") as f:
+ b = f.read(16)
+ if b'\x57\x45\x42\x50' in b: # Binary for WEBP
+ [thumbnail_filename_path, thumbnail_filename_extension] = os.path.splitext(thumbnail_filename)
+ if not thumbnail_filename_extension == ".webp":
+ webp_thumbnail_filename = thumbnail_filename_path + ".webp"
+ os.rename(encodeFilename(thumbnail_filename), encodeFilename(webp_thumbnail_filename))
+ thumbnail_filename = webp_thumbnail_filename
+
+ # If not a jpg or png thumbnail, convert it to jpg using ffmpeg
+ if not os.path.splitext(thumbnail_filename)[1].lower() in ['.jpg', '.png']:
+ jpg_thumbnail_filename = os.path.splitext(thumbnail_filename)[0] + ".jpg"
+ jpg_thumbnail_filename = os.path.join(os.path.dirname(jpg_thumbnail_filename), os.path.basename(jpg_thumbnail_filename).replace('%', '_')) # ffmpeg interprets % as image sequence
+
+ self._downloader.to_screen('[ffmpeg] Converting thumbnail "%s" to JPEG' % thumbnail_filename)
+
+ self.run_ffmpeg(thumbnail_filename, jpg_thumbnail_filename, ['-bsf:v', 'mjpeg2jpeg'])
+
+ os.remove(encodeFilename(thumbnail_filename))
+ thumbnail_filename = jpg_thumbnail_filename
+
+ if info['ext'] == 'mp3':
+ options = [
+ '-c', 'copy', '-map', '0', '-map', '1',
+ '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment="Cover (Front)"']
+
+ self._downloader.to_screen('[ffmpeg] Adding thumbnail to "%s"' % filename)
+
+ self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
+
+ if not self._already_have_thumbnail:
+ os.remove(encodeFilename(thumbnail_filename))
+ os.remove(encodeFilename(filename))
+ os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+
+ elif info['ext'] in ['m4a', 'mp4']:
+ if not check_executable('AtomicParsley', ['-v']):
+ raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.')
+
+ cmd = [encodeFilename('AtomicParsley', True),
+ encodeFilename(filename, True),
+ encodeArgument('--artwork'),
+ encodeFilename(thumbnail_filename, True),
+ encodeArgument('-o'),
+ encodeFilename(temp_filename, True)]
+
+ self._downloader.to_screen('[atomicparsley] Adding thumbnail to "%s"' % filename)
+
+ if self._downloader.params.get('verbose', False):
+ self._downloader.to_screen('[debug] AtomicParsley command line: %s' % shell_quote(cmd))
+
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ stdout, stderr = p.communicate()
+
+ if p.returncode != 0:
+ msg = stderr.decode('utf-8', 'replace').strip()
+ raise EmbedThumbnailPPError(msg)
+
+ if not self._already_have_thumbnail:
+ os.remove(encodeFilename(thumbnail_filename))
+ # for formats that don't support thumbnails (like 3gp) AtomicParsley
+ # won't create to the temporary file
+ if b'No changes' in stdout:
+ self._downloader.report_warning('The file format doesn\'t support embedding a thumbnail')
+ else:
+ os.remove(encodeFilename(filename))
+ os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+ else:
+ raise EmbedThumbnailPPError('Only mp3 and m4a/mp4 are supported for thumbnail embedding for now.')
+
+ return [], info
diff --git a/youtube_dl/postprocessor/execafterdownload.py b/youtube_dlc/postprocessor/execafterdownload.py
index 64dabe790..64dabe790 100644
--- a/youtube_dl/postprocessor/execafterdownload.py
+++ b/youtube_dlc/postprocessor/execafterdownload.py
diff --git a/youtube_dlc/postprocessor/ffmpeg.py b/youtube_dlc/postprocessor/ffmpeg.py
new file mode 100644
index 000000000..dbc736c50
--- /dev/null
+++ b/youtube_dlc/postprocessor/ffmpeg.py
@@ -0,0 +1,657 @@
+from __future__ import unicode_literals
+
+import io
+import os
+import subprocess
+import time
+import re
+
+
+from .common import AudioConversionError, PostProcessor
+
+from ..utils import (
+ encodeArgument,
+ encodeFilename,
+ get_exe_version,
+ is_outdated_version,
+ PostProcessingError,
+ prepend_extension,
+ shell_quote,
+ subtitles_filename,
+ dfxp2srt,
+ ISO639Utils,
+ replace_extension,
+)
+
+
+EXT_TO_OUT_FORMATS = {
+ 'aac': 'adts',
+ 'flac': 'flac',
+ 'm4a': 'ipod',
+ 'mka': 'matroska',
+ 'mkv': 'matroska',
+ 'mpg': 'mpeg',
+ 'ogv': 'ogg',
+ 'ts': 'mpegts',
+ 'wma': 'asf',
+ 'wmv': 'asf',
+}
+ACODECS = {
+ 'mp3': 'libmp3lame',
+ 'aac': 'aac',
+ 'flac': 'flac',
+ 'm4a': 'aac',
+ 'opus': 'libopus',
+ 'vorbis': 'libvorbis',
+ 'wav': None,
+}
+
+
+class FFmpegPostProcessorError(PostProcessingError):
+ pass
+
+
+class FFmpegPostProcessor(PostProcessor):
+ def __init__(self, downloader=None):
+ PostProcessor.__init__(self, downloader)
+ self._determine_executables()
+
+ def check_version(self):
+ if not self.available:
+ raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.')
+
+ required_version = '10-0' if self.basename == 'avconv' else '1.0'
+ if is_outdated_version(
+ self._versions[self.basename], required_version):
+ warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
+ self.basename, self.basename, required_version)
+ if self._downloader:
+ self._downloader.report_warning(warning)
+
+ @staticmethod
+ def get_versions(downloader=None):
+ return FFmpegPostProcessor(downloader)._versions
+
+ def _determine_executables(self):
+ programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
+ prefer_ffmpeg = True
+
+ def get_ffmpeg_version(path):
+ ver = get_exe_version(path, args=['-version'])
+ if ver:
+ regexs = [
+ r'(?:\d+:)?([0-9.]+)-[0-9]+ubuntu[0-9.]+$', # Ubuntu, see [1]
+ r'n([0-9.]+)$', # Arch Linux
+ # 1. http://www.ducea.com/2006/06/17/ubuntu-package-version-naming-explanation/
+ ]
+ for regex in regexs:
+ mobj = re.match(regex, ver)
+ if mobj:
+ ver = mobj.group(1)
+ return ver
+
+ self.basename = None
+ self.probe_basename = None
+
+ self._paths = None
+ self._versions = None
+ if self._downloader:
+ prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', True)
+ location = self._downloader.params.get('ffmpeg_location')
+ if location is not None:
+ if not os.path.exists(location):
+ self._downloader.report_warning(
+ 'ffmpeg-location %s does not exist! '
+ 'Continuing without avconv/ffmpeg.' % (location))
+ self._versions = {}
+ return
+ elif not os.path.isdir(location):
+ basename = os.path.splitext(os.path.basename(location))[0]
+ if basename not in programs:
+ self._downloader.report_warning(
+ 'Cannot identify executable %s, its basename should be one of %s. '
+ 'Continuing without avconv/ffmpeg.' %
+ (location, ', '.join(programs)))
+ self._versions = {}
+ return None
+ location = os.path.dirname(os.path.abspath(location))
+ if basename in ('ffmpeg', 'ffprobe'):
+ prefer_ffmpeg = True
+
+ self._paths = dict(
+ (p, os.path.join(location, p)) for p in programs)
+ self._versions = dict(
+ (p, get_ffmpeg_version(self._paths[p])) for p in programs)
+ if self._versions is None:
+ self._versions = dict(
+ (p, get_ffmpeg_version(p)) for p in programs)
+ self._paths = dict((p, p) for p in programs)
+
+ if prefer_ffmpeg is False:
+ prefs = ('avconv', 'ffmpeg')
+ else:
+ prefs = ('ffmpeg', 'avconv')
+ for p in prefs:
+ if self._versions[p]:
+ self.basename = p
+ break
+
+ if prefer_ffmpeg is False:
+ prefs = ('avprobe', 'ffprobe')
+ else:
+ prefs = ('ffprobe', 'avprobe')
+ for p in prefs:
+ if self._versions[p]:
+ self.probe_basename = p
+ break
+
+ @property
+ def available(self):
+ return self.basename is not None
+
+ @property
+ def executable(self):
+ return self._paths[self.basename]
+
+ @property
+ def probe_available(self):
+ return self.probe_basename is not None
+
+ @property
+ def probe_executable(self):
+ return self._paths[self.probe_basename]
+
+ def get_audio_codec(self, path):
+ if not self.probe_available and not self.available:
+ raise PostProcessingError('ffprobe/avprobe and ffmpeg/avconv not found. Please install one.')
+ try:
+ if self.probe_available:
+ cmd = [
+ encodeFilename(self.probe_executable, True),
+ encodeArgument('-show_streams')]
+ else:
+ cmd = [
+ encodeFilename(self.executable, True),
+ encodeArgument('-i')]
+ cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True))
+ if self._downloader.params.get('verbose', False):
+ self._downloader.to_screen(
+ '[debug] %s command line: %s' % (self.basename, shell_quote(cmd)))
+ handle = subprocess.Popen(
+ cmd, stderr=subprocess.PIPE,
+ stdout=subprocess.PIPE, stdin=subprocess.PIPE)
+ stdout_data, stderr_data = handle.communicate()
+ expected_ret = 0 if self.probe_available else 1
+ if handle.wait() != expected_ret:
+ return None
+ except (IOError, OSError):
+ return None
+ output = (stdout_data if self.probe_available else stderr_data).decode('ascii', 'ignore')
+ if self.probe_available:
+ audio_codec = None
+ for line in output.split('\n'):
+ if line.startswith('codec_name='):
+ audio_codec = line.split('=')[1].strip()
+ elif line.strip() == 'codec_type=audio' and audio_codec is not None:
+ return audio_codec
+ else:
+ # Stream #FILE_INDEX:STREAM_INDEX[STREAM_ID](LANGUAGE): CODEC_TYPE: CODEC_NAME
+ mobj = re.search(
+ r'Stream\s*#\d+:\d+(?:\[0x[0-9a-f]+\])?(?:\([a-z]{3}\))?:\s*Audio:\s*([0-9a-z]+)',
+ output)
+ if mobj:
+ return mobj.group(1)
+ return None
+
+ def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
+ self.check_version()
+
+ oldest_mtime = min(
+ os.stat(encodeFilename(path)).st_mtime for path in input_paths)
+
+ opts += self._configuration_args()
+
+ files_cmd = []
+ for path in input_paths:
+ files_cmd.extend([
+ encodeArgument('-i'),
+ encodeFilename(self._ffmpeg_filename_argument(path), True)
+ ])
+ cmd = [encodeFilename(self.executable, True), encodeArgument('-y')]
+ # avconv does not have repeat option
+ if self.basename == 'ffmpeg':
+ cmd += [encodeArgument('-loglevel'), encodeArgument('repeat+info')]
+ cmd += (files_cmd
+ + [encodeArgument(o) for o in opts]
+ + [encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
+
+ if self._downloader.params.get('verbose', False):
+ self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd))
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+ stdout, stderr = p.communicate()
+ if p.returncode != 0:
+ stderr = stderr.decode('utf-8', 'replace')
+ msg = stderr.strip().split('\n')[-1]
+ raise FFmpegPostProcessorError(msg)
+ self.try_utime(out_path, oldest_mtime, oldest_mtime)
+
+ def run_ffmpeg(self, path, out_path, opts):
+ self.run_ffmpeg_multiple_files([path], out_path, opts)
+
+ def _ffmpeg_filename_argument(self, fn):
+ # Always use 'file:' because the filename may contain ':' (ffmpeg
+ # interprets that as a protocol) or can start with '-' (-- is broken in
+ # ffmpeg, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details)
+ # Also leave '-' intact in order not to break streaming to stdout.
+ return 'file:' + fn if fn != '-' else fn
+
+
+class FFmpegExtractAudioPP(FFmpegPostProcessor):
+ def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
+ FFmpegPostProcessor.__init__(self, downloader)
+ if preferredcodec is None:
+ preferredcodec = 'best'
+ self._preferredcodec = preferredcodec
+ self._preferredquality = preferredquality
+ self._nopostoverwrites = nopostoverwrites
+
+ def run_ffmpeg(self, path, out_path, codec, more_opts):
+ if codec is None:
+ acodec_opts = []
+ else:
+ acodec_opts = ['-acodec', codec]
+ opts = ['-vn'] + acodec_opts + more_opts
+ try:
+ FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
+ except FFmpegPostProcessorError as err:
+ raise AudioConversionError(err.msg)
+
+ def run(self, information):
+ path = information['filepath']
+
+ filecodec = self.get_audio_codec(path)
+ if filecodec is None:
+ raise PostProcessingError('WARNING: unable to obtain file audio codec with ffprobe')
+
+ more_opts = []
+ if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
+ if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
+ # Lossless, but in another container
+ acodec = 'copy'
+ extension = 'm4a'
+ more_opts = ['-bsf:a', 'aac_adtstoasc']
+ elif filecodec in ['aac', 'flac', 'mp3', 'vorbis', 'opus']:
+ # Lossless if possible
+ acodec = 'copy'
+ extension = filecodec
+ if filecodec == 'aac':
+ more_opts = ['-f', 'adts']
+ if filecodec == 'vorbis':
+ extension = 'ogg'
+ else:
+ # MP3 otherwise.
+ acodec = 'libmp3lame'
+ extension = 'mp3'
+ more_opts = []
+ if self._preferredquality is not None:
+ if int(self._preferredquality) < 10:
+ more_opts += ['-q:a', self._preferredquality]
+ else:
+ more_opts += ['-b:a', self._preferredquality + 'k']
+ else:
+ # We convert the audio (lossy if codec is lossy)
+ acodec = ACODECS[self._preferredcodec]
+ extension = self._preferredcodec
+ more_opts = []
+ if self._preferredquality is not None:
+ # The opus codec doesn't support the -aq option
+ if int(self._preferredquality) < 10 and extension != 'opus':
+ more_opts += ['-q:a', self._preferredquality]
+ else:
+ more_opts += ['-b:a', self._preferredquality + 'k']
+ if self._preferredcodec == 'aac':
+ more_opts += ['-f', 'adts']
+ if self._preferredcodec == 'm4a':
+ more_opts += ['-bsf:a', 'aac_adtstoasc']
+ if self._preferredcodec == 'vorbis':
+ extension = 'ogg'
+ if self._preferredcodec == 'wav':
+ extension = 'wav'
+ more_opts += ['-f', 'wav']
+
+ prefix, sep, ext = path.rpartition('.') # not os.path.splitext, since the latter does not work on unicode in all setups
+ new_path = prefix + sep + extension
+
+ information['filepath'] = new_path
+ information['ext'] = extension
+
+ # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
+ if (new_path == path
+ or (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))):
+ self._downloader.to_screen('[ffmpeg] Post-process file %s exists, skipping' % new_path)
+ return [], information
+
+ try:
+ self._downloader.to_screen('[ffmpeg] Destination: ' + new_path)
+ self.run_ffmpeg(path, new_path, acodec, more_opts)
+ except AudioConversionError as e:
+ raise PostProcessingError(
+ 'audio conversion failed: ' + e.msg)
+ except Exception:
+ raise PostProcessingError('error running ' + self.basename)
+
+ # Try to update the date time for extracted audio file.
+ if information.get('filetime') is not None:
+ self.try_utime(
+ new_path, time.time(), information['filetime'],
+ errnote='Cannot update utime of audio file')
+
+ return [path], information
+
+
+class FFmpegVideoConvertorPP(FFmpegPostProcessor):
+ def __init__(self, downloader=None, preferedformat=None):
+ super(FFmpegVideoConvertorPP, self).__init__(downloader)
+ self._preferedformat = preferedformat
+
+ def run(self, information):
+ path = information['filepath']
+ if information['ext'] == self._preferedformat:
+ self._downloader.to_screen('[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
+ return [], information
+ options = []
+ if self._preferedformat == 'avi':
+ options.extend(['-c:v', 'libxvid', '-vtag', 'XVID'])
+ prefix, sep, ext = path.rpartition('.')
+ outpath = prefix + sep + self._preferedformat
+ self._downloader.to_screen('[' + 'ffmpeg' + '] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) + outpath)
+ self.run_ffmpeg(path, outpath, options)
+ information['filepath'] = outpath
+ information['format'] = self._preferedformat
+ information['ext'] = self._preferedformat
+ return [path], information
+
+
+class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
+ def run(self, information):
+ if information['ext'] not in ('mp4', 'webm', 'mkv'):
+ self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4, webm or mkv files')
+ return [], information
+ subtitles = information.get('requested_subtitles')
+ if not subtitles:
+ self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed')
+ return [], information
+
+ filename = information['filepath']
+
+ ext = information['ext']
+ sub_langs = []
+ sub_filenames = []
+ webm_vtt_warn = False
+
+ for lang, sub_info in subtitles.items():
+ sub_ext = sub_info['ext']
+ if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
+ sub_langs.append(lang)
+ sub_filenames.append(subtitles_filename(filename, lang, sub_ext, ext))
+ else:
+ if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt':
+ webm_vtt_warn = True
+ self._downloader.to_screen('[ffmpeg] Only WebVTT subtitles can be embedded in webm files')
+
+ if not sub_langs:
+ return [], information
+
+ input_files = [filename] + sub_filenames
+
+ opts = [
+ '-map', '0',
+ '-c', 'copy',
+ # Don't copy the existing subtitles, we may be running the
+ # postprocessor a second time
+ '-map', '-0:s',
+ # Don't copy Apple TV chapters track, bin_data (see #19042, #19024,
+ # https://trac.ffmpeg.org/ticket/6016)
+ '-map', '-0:d',
+ ]
+ if information['ext'] == 'mp4':
+ opts += ['-c:s', 'mov_text']
+ for (i, lang) in enumerate(sub_langs):
+ opts.extend(['-map', '%d:0' % (i + 1)])
+ lang_code = ISO639Utils.short2long(lang) or lang
+ opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
+
+ temp_filename = prepend_extension(filename, 'temp')
+ self._downloader.to_screen('[ffmpeg] Embedding subtitles in \'%s\'' % filename)
+ self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
+ os.remove(encodeFilename(filename))
+ os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+
+ return sub_filenames, information
+
+
+class FFmpegMetadataPP(FFmpegPostProcessor):
+ def run(self, info):
+ metadata = {}
+
+ def add(meta_list, info_list=None):
+ if not info_list:
+ info_list = meta_list
+ if not isinstance(meta_list, (list, tuple)):
+ meta_list = (meta_list,)
+ if not isinstance(info_list, (list, tuple)):
+ info_list = (info_list,)
+ for info_f in info_list:
+ if info.get(info_f) is not None:
+ for meta_f in meta_list:
+ metadata[meta_f] = info[info_f]
+ break
+
+ # See [1-4] for some info on media metadata/metadata supported
+ # by ffmpeg.
+ # 1. https://kdenlive.org/en/project/adding-meta-data-to-mp4-video/
+ # 2. https://wiki.multimedia.cx/index.php/FFmpeg_Metadata
+ # 3. https://kodi.wiki/view/Video_file_tagging
+ # 4. http://atomicparsley.sourceforge.net/mpeg-4files.html
+
+ add('title', ('track', 'title'))
+ add('date', 'upload_date')
+ add(('description', 'comment'), 'description')
+ add('purl', 'webpage_url')
+ add('track', 'track_number')
+ add('artist', ('artist', 'creator', 'uploader', 'uploader_id'))
+ add('genre')
+ add('album')
+ add('album_artist')
+ add('disc', 'disc_number')
+ add('show', 'series')
+ add('season_number')
+ add('episode_id', ('episode', 'episode_id'))
+ add('episode_sort', 'episode_number')
+
+ if not metadata:
+ self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add')
+ return [], info
+
+ filename = info['filepath']
+ temp_filename = prepend_extension(filename, 'temp')
+ in_filenames = [filename]
+ options = []
+
+ if info['ext'] == 'm4a':
+ options.extend(['-vn', '-acodec', 'copy'])
+ else:
+ options.extend(['-c', 'copy'])
+
+ for (name, value) in metadata.items():
+ options.extend(['-metadata', '%s=%s' % (name, value)])
+
+ chapters = info.get('chapters', [])
+ if chapters:
+ metadata_filename = replace_extension(filename, 'meta')
+ with io.open(metadata_filename, 'wt', encoding='utf-8') as f:
+ def ffmpeg_escape(text):
+ return re.sub(r'(=|;|#|\\|\n)', r'\\\1', text)
+
+ metadata_file_content = ';FFMETADATA1\n'
+ for chapter in chapters:
+ metadata_file_content += '[CHAPTER]\nTIMEBASE=1/1000\n'
+ metadata_file_content += 'START=%d\n' % (chapter['start_time'] * 1000)
+ metadata_file_content += 'END=%d\n' % (chapter['end_time'] * 1000)
+ chapter_title = chapter.get('title')
+ if chapter_title:
+ metadata_file_content += 'title=%s\n' % ffmpeg_escape(chapter_title)
+ f.write(metadata_file_content)
+ in_filenames.append(metadata_filename)
+ options.extend(['-map_metadata', '1'])
+
+ self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename)
+ self.run_ffmpeg_multiple_files(in_filenames, temp_filename, options)
+ if chapters:
+ os.remove(metadata_filename)
+ os.remove(encodeFilename(filename))
+ os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+ return [], info
+
+
+class FFmpegMergerPP(FFmpegPostProcessor):
+ def run(self, info):
+ filename = info['filepath']
+ temp_filename = prepend_extension(filename, 'temp')
+ args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0']
+ self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename)
+ self.run_ffmpeg_multiple_files(info['__files_to_merge'], temp_filename, args)
+ os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+ return info['__files_to_merge'], info
+
+ def can_merge(self):
+ # TODO: figure out merge-capable ffmpeg version
+ if self.basename != 'avconv':
+ return True
+
+ required_version = '10-0'
+ if is_outdated_version(
+ self._versions[self.basename], required_version):
+ warning = ('Your copy of %s is outdated and unable to properly mux separate video and audio files, '
+ 'youtube-dlc will download single file media. '
+ 'Update %s to version %s or newer to fix this.') % (
+ self.basename, self.basename, required_version)
+ if self._downloader:
+ self._downloader.report_warning(warning)
+ return False
+ return True
+
+
+class FFmpegFixupStretchedPP(FFmpegPostProcessor):
+ def run(self, info):
+ stretched_ratio = info.get('stretched_ratio')
+ if stretched_ratio is None or stretched_ratio == 1:
+ return [], info
+
+ filename = info['filepath']
+ temp_filename = prepend_extension(filename, 'temp')
+
+ options = ['-c', 'copy', '-aspect', '%f' % stretched_ratio]
+ self._downloader.to_screen('[ffmpeg] Fixing aspect ratio in "%s"' % filename)
+ self.run_ffmpeg(filename, temp_filename, options)
+
+ os.remove(encodeFilename(filename))
+ os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+
+ return [], info
+
+
+class FFmpegFixupM4aPP(FFmpegPostProcessor):
+ def run(self, info):
+ if info.get('container') != 'm4a_dash':
+ return [], info
+
+ filename = info['filepath']
+ temp_filename = prepend_extension(filename, 'temp')
+
+ options = ['-c', 'copy', '-f', 'mp4']
+ self._downloader.to_screen('[ffmpeg] Correcting container in "%s"' % filename)
+ self.run_ffmpeg(filename, temp_filename, options)
+
+ os.remove(encodeFilename(filename))
+ os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+
+ return [], info
+
+
+class FFmpegFixupM3u8PP(FFmpegPostProcessor):
+ def run(self, info):
+ filename = info['filepath']
+ if self.get_audio_codec(filename) == 'aac':
+ temp_filename = prepend_extension(filename, 'temp')
+
+ options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
+ self._downloader.to_screen('[ffmpeg] Fixing malformed AAC bitstream in "%s"' % filename)
+ self.run_ffmpeg(filename, temp_filename, options)
+
+ os.remove(encodeFilename(filename))
+ os.rename(encodeFilename(temp_filename), encodeFilename(filename))
+ return [], info
+
+
+class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
+ def __init__(self, downloader=None, format=None):
+ super(FFmpegSubtitlesConvertorPP, self).__init__(downloader)
+ self.format = format
+
+ def run(self, info):
+ subs = info.get('requested_subtitles')
+ filename = info['filepath']
+ new_ext = self.format
+ new_format = new_ext
+ if new_format == 'vtt':
+ new_format = 'webvtt'
+ if subs is None:
+ self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert')
+ return [], info
+ self._downloader.to_screen('[ffmpeg] Converting subtitles')
+ sub_filenames = []
+ for lang, sub in subs.items():
+ ext = sub['ext']
+ if ext == new_ext:
+ self._downloader.to_screen(
+ '[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext)
+ continue
+ old_file = subtitles_filename(filename, lang, ext, info.get('ext'))
+ sub_filenames.append(old_file)
+ new_file = subtitles_filename(filename, lang, new_ext, info.get('ext'))
+
+ if ext in ('dfxp', 'ttml', 'tt'):
+ self._downloader.report_warning(
+ 'You have requested to convert dfxp (TTML) subtitles into another format, '
+ 'which results in style information loss')
+
+ dfxp_file = old_file
+ srt_file = subtitles_filename(filename, lang, 'srt', info.get('ext'))
+
+ with open(dfxp_file, 'rb') as f:
+ srt_data = dfxp2srt(f.read())
+
+ with io.open(srt_file, 'wt', encoding='utf-8') as f:
+ f.write(srt_data)
+ old_file = srt_file
+
+ subs[lang] = {
+ 'ext': 'srt',
+ 'data': srt_data
+ }
+
+ if new_ext == 'srt':
+ continue
+ else:
+ sub_filenames.append(srt_file)
+
+ self.run_ffmpeg(old_file, new_file, ['-f', new_format])
+
+ with io.open(new_file, 'rt', encoding='utf-8') as f:
+ subs[lang] = {
+ 'ext': new_ext,
+ 'data': f.read(),
+ }
+
+ return sub_filenames, info
diff --git a/youtube_dl/postprocessor/metadatafromtitle.py b/youtube_dlc/postprocessor/metadatafromtitle.py
index f5c14d974..f5c14d974 100644
--- a/youtube_dl/postprocessor/metadatafromtitle.py
+++ b/youtube_dlc/postprocessor/metadatafromtitle.py
diff --git a/youtube_dl/postprocessor/xattrpp.py b/youtube_dlc/postprocessor/xattrpp.py
index 814dabecf..814dabecf 100644
--- a/youtube_dl/postprocessor/xattrpp.py
+++ b/youtube_dlc/postprocessor/xattrpp.py
diff --git a/youtube_dl/socks.py b/youtube_dlc/socks.py
index 5d4adbe72..5d4adbe72 100644
--- a/youtube_dl/socks.py
+++ b/youtube_dlc/socks.py
diff --git a/youtube_dl/swfinterp.py b/youtube_dlc/swfinterp.py
index 0c7158575..0c7158575 100644
--- a/youtube_dl/swfinterp.py
+++ b/youtube_dlc/swfinterp.py
diff --git a/youtube_dlc/update.py b/youtube_dlc/update.py
new file mode 100644
index 000000000..d95a07c0c
--- /dev/null
+++ b/youtube_dlc/update.py
@@ -0,0 +1,190 @@
+from __future__ import unicode_literals
+
+import io
+import json
+import traceback
+import hashlib
+import os
+import subprocess
+import sys
+from zipimport import zipimporter
+
+from .compat import compat_realpath
+from .utils import encode_compat_str
+
+from .version import __version__
+
+
+def rsa_verify(message, signature, key):
+ from hashlib import sha256
+ assert isinstance(message, bytes)
+ byte_size = (len(bin(key[0])) - 2 + 8 - 1) // 8
+ signature = ('%x' % pow(int(signature, 16), key[1], key[0])).encode()
+ signature = (byte_size * 2 - len(signature)) * b'0' + signature
+ asn1 = b'3031300d060960864801650304020105000420'
+ asn1 += sha256(message).hexdigest().encode()
+ if byte_size < len(asn1) // 2 + 11:
+ return False
+ expected = b'0001' + (byte_size - len(asn1) // 2 - 3) * b'ff' + b'00' + asn1
+ return expected == signature
+
+
+def update_self(to_screen, verbose, opener):
+ """Update the program file with the latest version from the repository"""
+
+ UPDATE_URL = 'https://yt-dl.org/update/'
+ VERSION_URL = UPDATE_URL + 'LATEST_VERSION'
+ JSON_URL = UPDATE_URL + 'versions.json'
+ UPDATES_RSA_KEY = (0x9d60ee4d8f805312fdb15a62f87b95bd66177b91df176765d13514a0f1754bcd2057295c5b6f1d35daa6742c3ffc9a82d3e118861c207995a8031e151d863c9927e304576bc80692bc8e094896fcf11b66f3e29e04e3a71e9a11558558acea1840aec37fc396fb6b65dc81a1c4144e03bd1c011de62e3f1357b327d08426fe93, 65537)
+
+ if not isinstance(globals().get('__loader__'), zipimporter) and not hasattr(sys, 'frozen'):
+ to_screen('It looks like you installed youtube-dlc with a package manager, pip, setup.py or a tarball. Please use that to update.')
+ return
+
+ # Check if there is a new version
+ try:
+ newversion = opener.open(VERSION_URL).read().decode('utf-8').strip()
+ except Exception:
+ if verbose:
+ to_screen(encode_compat_str(traceback.format_exc()))
+ to_screen('ERROR: can\'t find the current version. Please try again later.')
+ return
+ if newversion == __version__:
+ to_screen('youtube-dlc is up-to-date (' + __version__ + ')')
+ return
+
+ # Download and check versions info
+ try:
+ versions_info = opener.open(JSON_URL).read().decode('utf-8')
+ versions_info = json.loads(versions_info)
+ except Exception:
+ if verbose:
+ to_screen(encode_compat_str(traceback.format_exc()))
+ to_screen('ERROR: can\'t obtain versions info. Please try again later.')
+ return
+ if 'signature' not in versions_info:
+ to_screen('ERROR: the versions file is not signed or corrupted. Aborting.')
+ return
+ signature = versions_info['signature']
+ del versions_info['signature']
+ if not rsa_verify(json.dumps(versions_info, sort_keys=True).encode('utf-8'), signature, UPDATES_RSA_KEY):
+ to_screen('ERROR: the versions file signature is invalid. Aborting.')
+ return
+
+ version_id = versions_info['latest']
+
+ def version_tuple(version_str):
+ return tuple(map(int, version_str.split('.')))
+ if version_tuple(__version__) >= version_tuple(version_id):
+ to_screen('youtube-dlc is up to date (%s)' % __version__)
+ return
+
+ to_screen('Updating to version ' + version_id + ' ...')
+ version = versions_info['versions'][version_id]
+
+ print_notes(to_screen, versions_info['versions'])
+
+ # sys.executable is set to the full pathname of the exe-file for py2exe
+ # though symlinks are not followed so that we need to do this manually
+ # with help of realpath
+ filename = compat_realpath(sys.executable if hasattr(sys, 'frozen') else sys.argv[0])
+
+ if not os.access(filename, os.W_OK):
+ to_screen('ERROR: no write permissions on %s' % filename)
+ return
+
+ # Py2EXE
+ if hasattr(sys, 'frozen'):
+ exe = filename
+ directory = os.path.dirname(exe)
+ if not os.access(directory, os.W_OK):
+ to_screen('ERROR: no write permissions on %s' % directory)
+ return
+
+ try:
+ urlh = opener.open(version['exe'][0])
+ newcontent = urlh.read()
+ urlh.close()
+ except (IOError, OSError):
+ if verbose:
+ to_screen(encode_compat_str(traceback.format_exc()))
+ to_screen('ERROR: unable to download latest version')
+ return
+
+ newcontent_hash = hashlib.sha256(newcontent).hexdigest()
+ if newcontent_hash != version['exe'][1]:
+ to_screen('ERROR: the downloaded file hash does not match. Aborting.')
+ return
+
+ try:
+ with open(exe + '.new', 'wb') as outf:
+ outf.write(newcontent)
+ except (IOError, OSError):
+ if verbose:
+ to_screen(encode_compat_str(traceback.format_exc()))
+ to_screen('ERROR: unable to write the new version')
+ return
+
+ try:
+ bat = os.path.join(directory, 'youtube-dlc-updater.bat')
+ with io.open(bat, 'w') as batfile:
+ batfile.write('''
+@echo off
+echo Waiting for file handle to be closed ...
+ping 127.0.0.1 -n 5 -w 1000 > NUL
+move /Y "%s.new" "%s" > NUL
+echo Updated youtube-dlc to version %s.
+start /b "" cmd /c del "%%~f0"&exit /b"
+ \n''' % (exe, exe, version_id))
+
+ subprocess.Popen([bat]) # Continues to run in the background
+ return # Do not show premature success messages
+ except (IOError, OSError):
+ if verbose:
+ to_screen(encode_compat_str(traceback.format_exc()))
+ to_screen('ERROR: unable to overwrite current version')
+ return
+
+ # Zip unix package
+ elif isinstance(globals().get('__loader__'), zipimporter):
+ try:
+ urlh = opener.open(version['bin'][0])
+ newcontent = urlh.read()
+ urlh.close()
+ except (IOError, OSError):
+ if verbose:
+ to_screen(encode_compat_str(traceback.format_exc()))
+ to_screen('ERROR: unable to download latest version')
+ return
+
+ newcontent_hash = hashlib.sha256(newcontent).hexdigest()
+ if newcontent_hash != version['bin'][1]:
+ to_screen('ERROR: the downloaded file hash does not match. Aborting.')
+ return
+
+ try:
+ with open(filename, 'wb') as outf:
+ outf.write(newcontent)
+ except (IOError, OSError):
+ if verbose:
+ to_screen(encode_compat_str(traceback.format_exc()))
+ to_screen('ERROR: unable to overwrite current version')
+ return
+
+ to_screen('Updated youtube-dlc. Restart youtube-dlc to use the new version.')
+
+
+def get_notes(versions, fromVersion):
+ notes = []
+ for v, vdata in sorted(versions.items()):
+ if v > fromVersion:
+ notes.extend(vdata.get('notes', []))
+ return notes
+
+
+def print_notes(to_screen, versions, fromVersion=__version__):
+ notes = get_notes(versions, fromVersion)
+ if notes:
+ to_screen('PLEASE NOTE:')
+ for note in notes:
+ to_screen(note)
diff --git a/youtube_dlc/utils.py b/youtube_dlc/utils.py
new file mode 100644
index 000000000..7dafacac2
--- /dev/null
+++ b/youtube_dlc/utils.py
@@ -0,0 +1,5707 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+from __future__ import unicode_literals
+
+import base64
+import binascii
+import calendar
+import codecs
+import collections
+import contextlib
+import ctypes
+import datetime
+import email.utils
+import email.header
+import errno
+import functools
+import gzip
+import io
+import itertools
+import json
+import locale
+import math
+import operator
+import os
+import platform
+import random
+import re
+import socket
+import ssl
+import subprocess
+import sys
+import tempfile
+import time
+import traceback
+import xml.etree.ElementTree
+import zlib
+
+from .compat import (
+ compat_HTMLParseError,
+ compat_HTMLParser,
+ compat_basestring,
+ compat_chr,
+ compat_cookiejar,
+ compat_ctypes_WINFUNCTYPE,
+ compat_etree_fromstring,
+ compat_expanduser,
+ compat_html_entities,
+ compat_html_entities_html5,
+ compat_http_client,
+ compat_integer_types,
+ compat_kwargs,
+ compat_os_name,
+ compat_parse_qs,
+ compat_shlex_quote,
+ compat_str,
+ compat_struct_pack,
+ compat_struct_unpack,
+ compat_urllib_error,
+ compat_urllib_parse,
+ compat_urllib_parse_urlencode,
+ compat_urllib_parse_urlparse,
+ compat_urllib_parse_unquote_plus,
+ compat_urllib_request,
+ compat_urlparse,
+ compat_xpath,
+)
+
+from .socks import (
+ ProxyType,
+ sockssocket,
+)
+
+
+def register_socks_protocols():
+ # "Register" SOCKS protocols
+ # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
+ # URLs with protocols not in urlparse.uses_netloc are not handled correctly
+ for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
+ if scheme not in compat_urlparse.uses_netloc:
+ compat_urlparse.uses_netloc.append(scheme)
+
+
+# This is not clearly defined otherwise
+compiled_regex_type = type(re.compile(''))
+
+
+def random_user_agent():
+ _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
+ _CHROME_VERSIONS = (
+ '74.0.3729.129',
+ '76.0.3780.3',
+ '76.0.3780.2',
+ '74.0.3729.128',
+ '76.0.3780.1',
+ '76.0.3780.0',
+ '75.0.3770.15',
+ '74.0.3729.127',
+ '74.0.3729.126',
+ '76.0.3779.1',
+ '76.0.3779.0',
+ '75.0.3770.14',
+ '74.0.3729.125',
+ '76.0.3778.1',
+ '76.0.3778.0',
+ '75.0.3770.13',
+ '74.0.3729.124',
+ '74.0.3729.123',
+ '73.0.3683.121',
+ '76.0.3777.1',
+ '76.0.3777.0',
+ '75.0.3770.12',
+ '74.0.3729.122',
+ '76.0.3776.4',
+ '75.0.3770.11',
+ '74.0.3729.121',
+ '76.0.3776.3',
+ '76.0.3776.2',
+ '73.0.3683.120',
+ '74.0.3729.120',
+ '74.0.3729.119',
+ '74.0.3729.118',
+ '76.0.3776.1',
+ '76.0.3776.0',
+ '76.0.3775.5',
+ '75.0.3770.10',
+ '74.0.3729.117',
+ '76.0.3775.4',
+ '76.0.3775.3',
+ '74.0.3729.116',
+ '75.0.3770.9',
+ '76.0.3775.2',
+ '76.0.3775.1',
+ '76.0.3775.0',
+ '75.0.3770.8',
+ '74.0.3729.115',
+ '74.0.3729.114',
+ '76.0.3774.1',
+ '76.0.3774.0',
+ '75.0.3770.7',
+ '74.0.3729.113',
+ '74.0.3729.112',
+ '74.0.3729.111',
+ '76.0.3773.1',
+ '76.0.3773.0',
+ '75.0.3770.6',
+ '74.0.3729.110',
+ '74.0.3729.109',
+ '76.0.3772.1',
+ '76.0.3772.0',
+ '75.0.3770.5',
+ '74.0.3729.108',
+ '74.0.3729.107',
+ '76.0.3771.1',
+ '76.0.3771.0',
+ '75.0.3770.4',
+ '74.0.3729.106',
+ '74.0.3729.105',
+ '75.0.3770.3',
+ '74.0.3729.104',
+ '74.0.3729.103',
+ '74.0.3729.102',
+ '75.0.3770.2',
+ '74.0.3729.101',
+ '75.0.3770.1',
+ '75.0.3770.0',
+ '74.0.3729.100',
+ '75.0.3769.5',
+ '75.0.3769.4',
+ '74.0.3729.99',
+ '75.0.3769.3',
+ '75.0.3769.2',
+ '75.0.3768.6',
+ '74.0.3729.98',
+ '75.0.3769.1',
+ '75.0.3769.0',
+ '74.0.3729.97',
+ '73.0.3683.119',
+ '73.0.3683.118',
+ '74.0.3729.96',
+ '75.0.3768.5',
+ '75.0.3768.4',
+ '75.0.3768.3',
+ '75.0.3768.2',
+ '74.0.3729.95',
+ '74.0.3729.94',
+ '75.0.3768.1',
+ '75.0.3768.0',
+ '74.0.3729.93',
+ '74.0.3729.92',
+ '73.0.3683.117',
+ '74.0.3729.91',
+ '75.0.3766.3',
+ '74.0.3729.90',
+ '75.0.3767.2',
+ '75.0.3767.1',
+ '75.0.3767.0',
+ '74.0.3729.89',
+ '73.0.3683.116',
+ '75.0.3766.2',
+ '74.0.3729.88',
+ '75.0.3766.1',
+ '75.0.3766.0',
+ '74.0.3729.87',
+ '73.0.3683.115',
+ '74.0.3729.86',
+ '75.0.3765.1',
+ '75.0.3765.0',
+ '74.0.3729.85',
+ '73.0.3683.114',
+ '74.0.3729.84',
+ '75.0.3764.1',
+ '75.0.3764.0',
+ '74.0.3729.83',
+ '73.0.3683.113',
+ '75.0.3763.2',
+ '75.0.3761.4',
+ '74.0.3729.82',
+ '75.0.3763.1',
+ '75.0.3763.0',
+ '74.0.3729.81',
+ '73.0.3683.112',
+ '75.0.3762.1',
+ '75.0.3762.0',
+ '74.0.3729.80',
+ '75.0.3761.3',
+ '74.0.3729.79',
+ '73.0.3683.111',
+ '75.0.3761.2',
+ '74.0.3729.78',
+ '74.0.3729.77',
+ '75.0.3761.1',
+ '75.0.3761.0',
+ '73.0.3683.110',
+ '74.0.3729.76',
+ '74.0.3729.75',
+ '75.0.3760.0',
+ '74.0.3729.74',
+ '75.0.3759.8',
+ '75.0.3759.7',
+ '75.0.3759.6',
+ '74.0.3729.73',
+ '75.0.3759.5',
+ '74.0.3729.72',
+ '73.0.3683.109',
+ '75.0.3759.4',
+ '75.0.3759.3',
+ '74.0.3729.71',
+ '75.0.3759.2',
+ '74.0.3729.70',
+ '73.0.3683.108',
+ '74.0.3729.69',
+ '75.0.3759.1',
+ '75.0.3759.0',
+ '74.0.3729.68',
+ '73.0.3683.107',
+ '74.0.3729.67',
+ '75.0.3758.1',
+ '75.0.3758.0',
+ '74.0.3729.66',
+ '73.0.3683.106',
+ '74.0.3729.65',
+ '75.0.3757.1',
+ '75.0.3757.0',
+ '74.0.3729.64',
+ '73.0.3683.105',
+ '74.0.3729.63',
+ '75.0.3756.1',
+ '75.0.3756.0',
+ '74.0.3729.62',
+ '73.0.3683.104',
+ '75.0.3755.3',
+ '75.0.3755.2',
+ '73.0.3683.103',
+ '75.0.3755.1',
+ '75.0.3755.0',
+ '74.0.3729.61',
+ '73.0.3683.102',
+ '74.0.3729.60',
+ '75.0.3754.2',
+ '74.0.3729.59',
+ '75.0.3753.4',
+ '74.0.3729.58',
+ '75.0.3754.1',
+ '75.0.3754.0',
+ '74.0.3729.57',
+ '73.0.3683.101',
+ '75.0.3753.3',
+ '75.0.3752.2',
+ '75.0.3753.2',
+ '74.0.3729.56',
+ '75.0.3753.1',
+ '75.0.3753.0',
+ '74.0.3729.55',
+ '73.0.3683.100',
+ '74.0.3729.54',
+ '75.0.3752.1',
+ '75.0.3752.0',
+ '74.0.3729.53',
+ '73.0.3683.99',
+ '74.0.3729.52',
+ '75.0.3751.1',
+ '75.0.3751.0',
+ '74.0.3729.51',
+ '73.0.3683.98',
+ '74.0.3729.50',
+ '75.0.3750.0',
+ '74.0.3729.49',
+ '74.0.3729.48',
+ '74.0.3729.47',
+ '75.0.3749.3',
+ '74.0.3729.46',
+ '73.0.3683.97',
+ '75.0.3749.2',
+ '74.0.3729.45',
+ '75.0.3749.1',
+ '75.0.3749.0',
+ '74.0.3729.44',
+ '73.0.3683.96',
+ '74.0.3729.43',
+ '74.0.3729.42',
+ '75.0.3748.1',
+ '75.0.3748.0',
+ '74.0.3729.41',
+ '75.0.3747.1',
+ '73.0.3683.95',
+ '75.0.3746.4',
+ '74.0.3729.40',
+ '74.0.3729.39',
+ '75.0.3747.0',
+ '75.0.3746.3',
+ '75.0.3746.2',
+ '74.0.3729.38',
+ '75.0.3746.1',
+ '75.0.3746.0',
+ '74.0.3729.37',
+ '73.0.3683.94',
+ '75.0.3745.5',
+ '75.0.3745.4',
+ '75.0.3745.3',
+ '75.0.3745.2',
+ '74.0.3729.36',
+ '75.0.3745.1',
+ '75.0.3745.0',
+ '75.0.3744.2',
+ '74.0.3729.35',
+ '73.0.3683.93',
+ '74.0.3729.34',
+ '75.0.3744.1',
+ '75.0.3744.0',
+ '74.0.3729.33',
+ '73.0.3683.92',
+ '74.0.3729.32',
+ '74.0.3729.31',
+ '73.0.3683.91',
+ '75.0.3741.2',
+ '75.0.3740.5',
+ '74.0.3729.30',
+ '75.0.3741.1',
+ '75.0.3741.0',
+ '74.0.3729.29',
+ '75.0.3740.4',
+ '73.0.3683.90',
+ '74.0.3729.28',
+ '75.0.3740.3',
+ '73.0.3683.89',
+ '75.0.3740.2',
+ '74.0.3729.27',
+ '75.0.3740.1',
+ '75.0.3740.0',
+ '74.0.3729.26',
+ '73.0.3683.88',
+ '73.0.3683.87',
+ '74.0.3729.25',
+ '75.0.3739.1',
+ '75.0.3739.0',
+ '73.0.3683.86',
+ '74.0.3729.24',
+ '73.0.3683.85',
+ '75.0.3738.4',
+ '75.0.3738.3',
+ '75.0.3738.2',
+ '75.0.3738.1',
+ '75.0.3738.0',
+ '74.0.3729.23',
+ '73.0.3683.84',
+ '74.0.3729.22',
+ '74.0.3729.21',
+ '75.0.3737.1',
+ '75.0.3737.0',
+ '74.0.3729.20',
+ '73.0.3683.83',
+ '74.0.3729.19',
+ '75.0.3736.1',
+ '75.0.3736.0',
+ '74.0.3729.18',
+ '73.0.3683.82',
+ '74.0.3729.17',
+ '75.0.3735.1',
+ '75.0.3735.0',
+ '74.0.3729.16',
+ '73.0.3683.81',
+ '75.0.3734.1',
+ '75.0.3734.0',
+ '74.0.3729.15',
+ '73.0.3683.80',
+ '74.0.3729.14',
+ '75.0.3733.1',
+ '75.0.3733.0',
+ '75.0.3732.1',
+ '74.0.3729.13',
+ '74.0.3729.12',
+ '73.0.3683.79',
+ '74.0.3729.11',
+ '75.0.3732.0',
+ '74.0.3729.10',
+ '73.0.3683.78',
+ '74.0.3729.9',
+ '74.0.3729.8',
+ '74.0.3729.7',
+ '75.0.3731.3',
+ '75.0.3731.2',
+ '75.0.3731.0',
+ '74.0.3729.6',
+ '73.0.3683.77',
+ '73.0.3683.76',
+ '75.0.3730.5',
+ '75.0.3730.4',
+ '73.0.3683.75',
+ '74.0.3729.5',
+ '73.0.3683.74',
+ '75.0.3730.3',
+ '75.0.3730.2',
+ '74.0.3729.4',
+ '73.0.3683.73',
+ '73.0.3683.72',
+ '75.0.3730.1',
+ '75.0.3730.0',
+ '74.0.3729.3',
+ '73.0.3683.71',
+ '74.0.3729.2',
+ '73.0.3683.70',
+ '74.0.3729.1',
+ '74.0.3729.0',
+ '74.0.3726.4',
+ '73.0.3683.69',
+ '74.0.3726.3',
+ '74.0.3728.0',
+ '74.0.3726.2',
+ '73.0.3683.68',
+ '74.0.3726.1',
+ '74.0.3726.0',
+ '74.0.3725.4',
+ '73.0.3683.67',
+ '73.0.3683.66',
+ '74.0.3725.3',
+ '74.0.3725.2',
+ '74.0.3725.1',
+ '74.0.3724.8',
+ '74.0.3725.0',
+ '73.0.3683.65',
+ '74.0.3724.7',
+ '74.0.3724.6',
+ '74.0.3724.5',
+ '74.0.3724.4',
+ '74.0.3724.3',
+ '74.0.3724.2',
+ '74.0.3724.1',
+ '74.0.3724.0',
+ '73.0.3683.64',
+ '74.0.3723.1',
+ '74.0.3723.0',
+ '73.0.3683.63',
+ '74.0.3722.1',
+ '74.0.3722.0',
+ '73.0.3683.62',
+ '74.0.3718.9',
+ '74.0.3702.3',
+ '74.0.3721.3',
+ '74.0.3721.2',
+ '74.0.3721.1',
+ '74.0.3721.0',
+ '74.0.3720.6',
+ '73.0.3683.61',
+ '72.0.3626.122',
+ '73.0.3683.60',
+ '74.0.3720.5',
+ '72.0.3626.121',
+ '74.0.3718.8',
+ '74.0.3720.4',
+ '74.0.3720.3',
+ '74.0.3718.7',
+ '74.0.3720.2',
+ '74.0.3720.1',
+ '74.0.3720.0',
+ '74.0.3718.6',
+ '74.0.3719.5',
+ '73.0.3683.59',
+ '74.0.3718.5',
+ '74.0.3718.4',
+ '74.0.3719.4',
+ '74.0.3719.3',
+ '74.0.3719.2',
+ '74.0.3719.1',
+ '73.0.3683.58',
+ '74.0.3719.0',
+ '73.0.3683.57',
+ '73.0.3683.56',
+ '74.0.3718.3',
+ '73.0.3683.55',
+ '74.0.3718.2',
+ '74.0.3718.1',
+ '74.0.3718.0',
+ '73.0.3683.54',
+ '74.0.3717.2',
+ '73.0.3683.53',
+ '74.0.3717.1',
+ '74.0.3717.0',
+ '73.0.3683.52',
+ '74.0.3716.1',
+ '74.0.3716.0',
+ '73.0.3683.51',
+ '74.0.3715.1',
+ '74.0.3715.0',
+ '73.0.3683.50',
+ '74.0.3711.2',
+ '74.0.3714.2',
+ '74.0.3713.3',
+ '74.0.3714.1',
+ '74.0.3714.0',
+ '73.0.3683.49',
+ '74.0.3713.1',
+ '74.0.3713.0',
+ '72.0.3626.120',
+ '73.0.3683.48',
+ '74.0.3712.2',
+ '74.0.3712.1',
+ '74.0.3712.0',
+ '73.0.3683.47',
+ '72.0.3626.119',
+ '73.0.3683.46',
+ '74.0.3710.2',
+ '72.0.3626.118',
+ '74.0.3711.1',
+ '74.0.3711.0',
+ '73.0.3683.45',
+ '72.0.3626.117',
+ '74.0.3710.1',
+ '74.0.3710.0',
+ '73.0.3683.44',
+ '72.0.3626.116',
+ '74.0.3709.1',
+ '74.0.3709.0',
+ '74.0.3704.9',
+ '73.0.3683.43',
+ '72.0.3626.115',
+ '74.0.3704.8',
+ '74.0.3704.7',
+ '74.0.3708.0',
+ '74.0.3706.7',
+ '74.0.3704.6',
+ '73.0.3683.42',
+ '72.0.3626.114',
+ '74.0.3706.6',
+ '72.0.3626.113',
+ '74.0.3704.5',
+ '74.0.3706.5',
+ '74.0.3706.4',
+ '74.0.3706.3',
+ '74.0.3706.2',
+ '74.0.3706.1',
+ '74.0.3706.0',
+ '73.0.3683.41',
+ '72.0.3626.112',
+ '74.0.3705.1',
+ '74.0.3705.0',
+ '73.0.3683.40',
+ '72.0.3626.111',
+ '73.0.3683.39',
+ '74.0.3704.4',
+ '73.0.3683.38',
+ '74.0.3704.3',
+ '74.0.3704.2',
+ '74.0.3704.1',
+ '74.0.3704.0',
+ '73.0.3683.37',
+ '72.0.3626.110',
+ '72.0.3626.109',
+ '74.0.3703.3',
+ '74.0.3703.2',
+ '73.0.3683.36',
+ '74.0.3703.1',
+ '74.0.3703.0',
+ '73.0.3683.35',
+ '72.0.3626.108',
+ '74.0.3702.2',
+ '74.0.3699.3',
+ '74.0.3702.1',
+ '74.0.3702.0',
+ '73.0.3683.34',
+ '72.0.3626.107',
+ '73.0.3683.33',
+ '74.0.3701.1',
+ '74.0.3701.0',
+ '73.0.3683.32',
+ '73.0.3683.31',
+ '72.0.3626.105',
+ '74.0.3700.1',
+ '74.0.3700.0',
+ '73.0.3683.29',
+ '72.0.3626.103',
+ '74.0.3699.2',
+ '74.0.3699.1',
+ '74.0.3699.0',
+ '73.0.3683.28',
+ '72.0.3626.102',
+ '73.0.3683.27',
+ '73.0.3683.26',
+ '74.0.3698.0',
+ '74.0.3696.2',
+ '72.0.3626.101',
+ '73.0.3683.25',
+ '74.0.3696.1',
+ '74.0.3696.0',
+ '74.0.3694.8',
+ '72.0.3626.100',
+ '74.0.3694.7',
+ '74.0.3694.6',
+ '74.0.3694.5',
+ '74.0.3694.4',
+ '72.0.3626.99',
+ '72.0.3626.98',
+ '74.0.3694.3',
+ '73.0.3683.24',
+ '72.0.3626.97',
+ '72.0.3626.96',
+ '72.0.3626.95',
+ '73.0.3683.23',
+ '72.0.3626.94',
+ '73.0.3683.22',
+ '73.0.3683.21',
+ '72.0.3626.93',
+ '74.0.3694.2',
+ '72.0.3626.92',
+ '74.0.3694.1',
+ '74.0.3694.0',
+ '74.0.3693.6',
+ '73.0.3683.20',
+ '72.0.3626.91',
+ '74.0.3693.5',
+ '74.0.3693.4',
+ '74.0.3693.3',
+ '74.0.3693.2',
+ '73.0.3683.19',
+ '74.0.3693.1',
+ '74.0.3693.0',
+ '73.0.3683.18',
+ '72.0.3626.90',
+ '74.0.3692.1',
+ '74.0.3692.0',
+ '73.0.3683.17',
+ '72.0.3626.89',
+ '74.0.3687.3',
+ '74.0.3691.1',
+ '74.0.3691.0',
+ '73.0.3683.16',
+ '72.0.3626.88',
+ '72.0.3626.87',
+ '73.0.3683.15',
+ '74.0.3690.1',
+ '74.0.3690.0',
+ '73.0.3683.14',
+ '72.0.3626.86',
+ '73.0.3683.13',
+ '73.0.3683.12',
+ '74.0.3689.1',
+ '74.0.3689.0',
+ '73.0.3683.11',
+ '72.0.3626.85',
+ '73.0.3683.10',
+ '72.0.3626.84',
+ '73.0.3683.9',
+ '74.0.3688.1',
+ '74.0.3688.0',
+ '73.0.3683.8',
+ '72.0.3626.83',
+ '74.0.3687.2',
+ '74.0.3687.1',
+ '74.0.3687.0',
+ '73.0.3683.7',
+ '72.0.3626.82',
+ '74.0.3686.4',
+ '72.0.3626.81',
+ '74.0.3686.3',
+ '74.0.3686.2',
+ '74.0.3686.1',
+ '74.0.3686.0',
+ '73.0.3683.6',
+ '72.0.3626.80',
+ '74.0.3685.1',
+ '74.0.3685.0',
+ '73.0.3683.5',
+ '72.0.3626.79',
+ '74.0.3684.1',
+ '74.0.3684.0',
+ '73.0.3683.4',
+ '72.0.3626.78',
+ '72.0.3626.77',
+ '73.0.3683.3',
+ '73.0.3683.2',
+ '72.0.3626.76',
+ '73.0.3683.1',
+ '73.0.3683.0',
+ '72.0.3626.75',
+ '71.0.3578.141',
+ '73.0.3682.1',
+ '73.0.3682.0',
+ '72.0.3626.74',
+ '71.0.3578.140',
+ '73.0.3681.4',
+ '73.0.3681.3',
+ '73.0.3681.2',
+ '73.0.3681.1',
+ '73.0.3681.0',
+ '72.0.3626.73',
+ '71.0.3578.139',
+ '72.0.3626.72',
+ '72.0.3626.71',
+ '73.0.3680.1',
+ '73.0.3680.0',
+ '72.0.3626.70',
+ '71.0.3578.138',
+ '73.0.3678.2',
+ '73.0.3679.1',
+ '73.0.3679.0',
+ '72.0.3626.69',
+ '71.0.3578.137',
+ '73.0.3678.1',
+ '73.0.3678.0',
+ '71.0.3578.136',
+ '73.0.3677.1',
+ '73.0.3677.0',
+ '72.0.3626.68',
+ '72.0.3626.67',
+ '71.0.3578.135',
+ '73.0.3676.1',
+ '73.0.3676.0',
+ '73.0.3674.2',
+ '72.0.3626.66',
+ '71.0.3578.134',
+ '73.0.3674.1',
+ '73.0.3674.0',
+ '72.0.3626.65',
+ '71.0.3578.133',
+ '73.0.3673.2',
+ '73.0.3673.1',
+ '73.0.3673.0',
+ '72.0.3626.64',
+ '71.0.3578.132',
+ '72.0.3626.63',
+ '72.0.3626.62',
+ '72.0.3626.61',
+ '72.0.3626.60',
+ '73.0.3672.1',
+ '73.0.3672.0',
+ '72.0.3626.59',
+ '71.0.3578.131',
+ '73.0.3671.3',
+ '73.0.3671.2',
+ '73.0.3671.1',
+ '73.0.3671.0',
+ '72.0.3626.58',
+ '71.0.3578.130',
+ '73.0.3670.1',
+ '73.0.3670.0',
+ '72.0.3626.57',
+ '71.0.3578.129',
+ '73.0.3669.1',
+ '73.0.3669.0',
+ '72.0.3626.56',
+ '71.0.3578.128',
+ '73.0.3668.2',
+ '73.0.3668.1',
+ '73.0.3668.0',
+ '72.0.3626.55',
+ '71.0.3578.127',
+ '73.0.3667.2',
+ '73.0.3667.1',
+ '73.0.3667.0',
+ '72.0.3626.54',
+ '71.0.3578.126',
+ '73.0.3666.1',
+ '73.0.3666.0',
+ '72.0.3626.53',
+ '71.0.3578.125',
+ '73.0.3665.4',
+ '73.0.3665.3',
+ '72.0.3626.52',
+ '73.0.3665.2',
+ '73.0.3664.4',
+ '73.0.3665.1',
+ '73.0.3665.0',
+ '72.0.3626.51',
+ '71.0.3578.124',
+ '72.0.3626.50',
+ '73.0.3664.3',
+ '73.0.3664.2',
+ '73.0.3664.1',
+ '73.0.3664.0',
+ '73.0.3663.2',
+ '72.0.3626.49',
+ '71.0.3578.123',
+ '73.0.3663.1',
+ '73.0.3663.0',
+ '72.0.3626.48',
+ '71.0.3578.122',
+ '73.0.3662.1',
+ '73.0.3662.0',
+ '72.0.3626.47',
+ '71.0.3578.121',
+ '73.0.3661.1',
+ '72.0.3626.46',
+ '73.0.3661.0',
+ '72.0.3626.45',
+ '71.0.3578.120',
+ '73.0.3660.2',
+ '73.0.3660.1',
+ '73.0.3660.0',
+ '72.0.3626.44',
+ '71.0.3578.119',
+ '73.0.3659.1',
+ '73.0.3659.0',
+ '72.0.3626.43',
+ '71.0.3578.118',
+ '73.0.3658.1',
+ '73.0.3658.0',
+ '72.0.3626.42',
+ '71.0.3578.117',
+ '73.0.3657.1',
+ '73.0.3657.0',
+ '72.0.3626.41',
+ '71.0.3578.116',
+ '73.0.3656.1',
+ '73.0.3656.0',
+ '72.0.3626.40',
+ '71.0.3578.115',
+ '73.0.3655.1',
+ '73.0.3655.0',
+ '72.0.3626.39',
+ '71.0.3578.114',
+ '73.0.3654.1',
+ '73.0.3654.0',
+ '72.0.3626.38',
+ '71.0.3578.113',
+ '73.0.3653.1',
+ '73.0.3653.0',
+ '72.0.3626.37',
+ '71.0.3578.112',
+ '73.0.3652.1',
+ '73.0.3652.0',
+ '72.0.3626.36',
+ '71.0.3578.111',
+ '73.0.3651.1',
+ '73.0.3651.0',
+ '72.0.3626.35',
+ '71.0.3578.110',
+ '73.0.3650.1',
+ '73.0.3650.0',
+ '72.0.3626.34',
+ '71.0.3578.109',
+ '73.0.3649.1',
+ '73.0.3649.0',
+ '72.0.3626.33',
+ '71.0.3578.108',
+ '73.0.3648.2',
+ '73.0.3648.1',
+ '73.0.3648.0',
+ '72.0.3626.32',
+ '71.0.3578.107',
+ '73.0.3647.2',
+ '73.0.3647.1',
+ '73.0.3647.0',
+ '72.0.3626.31',
+ '71.0.3578.106',
+ '73.0.3635.3',
+ '73.0.3646.2',
+ '73.0.3646.1',
+ '73.0.3646.0',
+ '72.0.3626.30',
+ '71.0.3578.105',
+ '72.0.3626.29',
+ '73.0.3645.2',
+ '73.0.3645.1',
+ '73.0.3645.0',
+ '72.0.3626.28',
+ '71.0.3578.104',
+ '72.0.3626.27',
+ '72.0.3626.26',
+ '72.0.3626.25',
+ '72.0.3626.24',
+ '73.0.3644.0',
+ '73.0.3643.2',
+ '72.0.3626.23',
+ '71.0.3578.103',
+ '73.0.3643.1',
+ '73.0.3643.0',
+ '72.0.3626.22',
+ '71.0.3578.102',
+ '73.0.3642.1',
+ '73.0.3642.0',
+ '72.0.3626.21',
+ '71.0.3578.101',
+ '73.0.3641.1',
+ '73.0.3641.0',
+ '72.0.3626.20',
+ '71.0.3578.100',
+ '72.0.3626.19',
+ '73.0.3640.1',
+ '73.0.3640.0',
+ '72.0.3626.18',
+ '73.0.3639.1',
+ '71.0.3578.99',
+ '73.0.3639.0',
+ '72.0.3626.17',
+ '73.0.3638.2',
+ '72.0.3626.16',
+ '73.0.3638.1',
+ '73.0.3638.0',
+ '72.0.3626.15',
+ '71.0.3578.98',
+ '73.0.3635.2',
+ '71.0.3578.97',
+ '73.0.3637.1',
+ '73.0.3637.0',
+ '72.0.3626.14',
+ '71.0.3578.96',
+ '71.0.3578.95',
+ '72.0.3626.13',
+ '71.0.3578.94',
+ '73.0.3636.2',
+ '71.0.3578.93',
+ '73.0.3636.1',
+ '73.0.3636.0',
+ '72.0.3626.12',
+ '71.0.3578.92',
+ '73.0.3635.1',
+ '73.0.3635.0',
+ '72.0.3626.11',
+ '71.0.3578.91',
+ '73.0.3634.2',
+ '73.0.3634.1',
+ '73.0.3634.0',
+ '72.0.3626.10',
+ '71.0.3578.90',
+ '71.0.3578.89',
+ '73.0.3633.2',
+ '73.0.3633.1',
+ '73.0.3633.0',
+ '72.0.3610.4',
+ '72.0.3626.9',
+ '71.0.3578.88',
+ '73.0.3632.5',
+ '73.0.3632.4',
+ '73.0.3632.3',
+ '73.0.3632.2',
+ '73.0.3632.1',
+ '73.0.3632.0',
+ '72.0.3626.8',
+ '71.0.3578.87',
+ '73.0.3631.2',
+ '73.0.3631.1',
+ '73.0.3631.0',
+ '72.0.3626.7',
+ '71.0.3578.86',
+ '72.0.3626.6',
+ '73.0.3630.1',
+ '73.0.3630.0',
+ '72.0.3626.5',
+ '71.0.3578.85',
+ '72.0.3626.4',
+ '73.0.3628.3',
+ '73.0.3628.2',
+ '73.0.3629.1',
+ '73.0.3629.0',
+ '72.0.3626.3',
+ '71.0.3578.84',
+ '73.0.3628.1',
+ '73.0.3628.0',
+ '71.0.3578.83',
+ '73.0.3627.1',
+ '73.0.3627.0',
+ '72.0.3626.2',
+ '71.0.3578.82',
+ '71.0.3578.81',
+ '71.0.3578.80',
+ '72.0.3626.1',
+ '72.0.3626.0',
+ '71.0.3578.79',
+ '70.0.3538.124',
+ '71.0.3578.78',
+ '72.0.3623.4',
+ '72.0.3625.2',
+ '72.0.3625.1',
+ '72.0.3625.0',
+ '71.0.3578.77',
+ '70.0.3538.123',
+ '72.0.3624.4',
+ '72.0.3624.3',
+ '72.0.3624.2',
+ '71.0.3578.76',
+ '72.0.3624.1',
+ '72.0.3624.0',
+ '72.0.3623.3',
+ '71.0.3578.75',
+ '70.0.3538.122',
+ '71.0.3578.74',
+ '72.0.3623.2',
+ '72.0.3610.3',
+ '72.0.3623.1',
+ '72.0.3623.0',
+ '72.0.3622.3',
+ '72.0.3622.2',
+ '71.0.3578.73',
+ '70.0.3538.121',
+ '72.0.3622.1',
+ '72.0.3622.0',
+ '71.0.3578.72',
+ '70.0.3538.120',
+ '72.0.3621.1',
+ '72.0.3621.0',
+ '71.0.3578.71',
+ '70.0.3538.119',
+ '72.0.3620.1',
+ '72.0.3620.0',
+ '71.0.3578.70',
+ '70.0.3538.118',
+ '71.0.3578.69',
+ '72.0.3619.1',
+ '72.0.3619.0',
+ '71.0.3578.68',
+ '70.0.3538.117',
+ '71.0.3578.67',
+ '72.0.3618.1',
+ '72.0.3618.0',
+ '71.0.3578.66',
+ '70.0.3538.116',
+ '72.0.3617.1',
+ '72.0.3617.0',
+ '71.0.3578.65',
+ '70.0.3538.115',
+ '72.0.3602.3',
+ '71.0.3578.64',
+ '72.0.3616.1',
+ '72.0.3616.0',
+ '71.0.3578.63',
+ '70.0.3538.114',
+ '71.0.3578.62',
+ '72.0.3615.1',
+ '72.0.3615.0',
+ '71.0.3578.61',
+ '70.0.3538.113',
+ '72.0.3614.1',
+ '72.0.3614.0',
+ '71.0.3578.60',
+ '70.0.3538.112',
+ '72.0.3613.1',
+ '72.0.3613.0',
+ '71.0.3578.59',
+ '70.0.3538.111',
+ '72.0.3612.2',
+ '72.0.3612.1',
+ '72.0.3612.0',
+ '70.0.3538.110',
+ '71.0.3578.58',
+ '70.0.3538.109',
+ '72.0.3611.2',
+ '72.0.3611.1',
+ '72.0.3611.0',
+ '71.0.3578.57',
+ '70.0.3538.108',
+ '72.0.3610.2',
+ '71.0.3578.56',
+ '71.0.3578.55',
+ '72.0.3610.1',
+ '72.0.3610.0',
+ '71.0.3578.54',
+ '70.0.3538.107',
+ '71.0.3578.53',
+ '72.0.3609.3',
+ '71.0.3578.52',
+ '72.0.3609.2',
+ '71.0.3578.51',
+ '72.0.3608.5',
+ '72.0.3609.1',
+ '72.0.3609.0',
+ '71.0.3578.50',
+ '70.0.3538.106',
+ '72.0.3608.4',
+ '72.0.3608.3',
+ '72.0.3608.2',
+ '71.0.3578.49',
+ '72.0.3608.1',
+ '72.0.3608.0',
+ '70.0.3538.105',
+ '71.0.3578.48',
+ '72.0.3607.1',
+ '72.0.3607.0',
+ '71.0.3578.47',
+ '70.0.3538.104',
+ '72.0.3606.2',
+ '72.0.3606.1',
+ '72.0.3606.0',
+ '71.0.3578.46',
+ '70.0.3538.103',
+ '70.0.3538.102',
+ '72.0.3605.3',
+ '72.0.3605.2',
+ '72.0.3605.1',
+ '72.0.3605.0',
+ '71.0.3578.45',
+ '70.0.3538.101',
+ '71.0.3578.44',
+ '71.0.3578.43',
+ '70.0.3538.100',
+ '70.0.3538.99',
+ '71.0.3578.42',
+ '72.0.3604.1',
+ '72.0.3604.0',
+ '71.0.3578.41',
+ '70.0.3538.98',
+ '71.0.3578.40',
+ '72.0.3603.2',
+ '72.0.3603.1',
+ '72.0.3603.0',
+ '71.0.3578.39',
+ '70.0.3538.97',
+ '72.0.3602.2',
+ '71.0.3578.38',
+ '71.0.3578.37',
+ '72.0.3602.1',
+ '72.0.3602.0',
+ '71.0.3578.36',
+ '70.0.3538.96',
+ '72.0.3601.1',
+ '72.0.3601.0',
+ '71.0.3578.35',
+ '70.0.3538.95',
+ '72.0.3600.1',
+ '72.0.3600.0',
+ '71.0.3578.34',
+ '70.0.3538.94',
+ '72.0.3599.3',
+ '72.0.3599.2',
+ '72.0.3599.1',
+ '72.0.3599.0',
+ '71.0.3578.33',
+ '70.0.3538.93',
+ '72.0.3598.1',
+ '72.0.3598.0',
+ '71.0.3578.32',
+ '70.0.3538.87',
+ '72.0.3597.1',
+ '72.0.3597.0',
+ '72.0.3596.2',
+ '71.0.3578.31',
+ '70.0.3538.86',
+ '71.0.3578.30',
+ '71.0.3578.29',
+ '72.0.3596.1',
+ '72.0.3596.0',
+ '71.0.3578.28',
+ '70.0.3538.85',
+ '72.0.3595.2',
+ '72.0.3591.3',
+ '72.0.3595.1',
+ '72.0.3595.0',
+ '71.0.3578.27',
+ '70.0.3538.84',
+ '72.0.3594.1',
+ '72.0.3594.0',
+ '71.0.3578.26',
+ '70.0.3538.83',
+ '72.0.3593.2',
+ '72.0.3593.1',
+ '72.0.3593.0',
+ '71.0.3578.25',
+ '70.0.3538.82',
+ '72.0.3589.3',
+ '72.0.3592.2',
+ '72.0.3592.1',
+ '72.0.3592.0',
+ '71.0.3578.24',
+ '72.0.3589.2',
+ '70.0.3538.81',
+ '70.0.3538.80',
+ '72.0.3591.2',
+ '72.0.3591.1',
+ '72.0.3591.0',
+ '71.0.3578.23',
+ '70.0.3538.79',
+ '71.0.3578.22',
+ '72.0.3590.1',
+ '72.0.3590.0',
+ '71.0.3578.21',
+ '70.0.3538.78',
+ '70.0.3538.77',
+ '72.0.3589.1',
+ '72.0.3589.0',
+ '71.0.3578.20',
+ '70.0.3538.76',
+ '71.0.3578.19',
+ '70.0.3538.75',
+ '72.0.3588.1',
+ '72.0.3588.0',
+ '71.0.3578.18',
+ '70.0.3538.74',
+ '72.0.3586.2',
+ '72.0.3587.0',
+ '71.0.3578.17',
+ '70.0.3538.73',
+ '72.0.3586.1',
+ '72.0.3586.0',
+ '71.0.3578.16',
+ '70.0.3538.72',
+ '72.0.3585.1',
+ '72.0.3585.0',
+ '71.0.3578.15',
+ '70.0.3538.71',
+ '71.0.3578.14',
+ '72.0.3584.1',
+ '72.0.3584.0',
+ '71.0.3578.13',
+ '70.0.3538.70',
+ '72.0.3583.2',
+ '71.0.3578.12',
+ '72.0.3583.1',
+ '72.0.3583.0',
+ '71.0.3578.11',
+ '70.0.3538.69',
+ '71.0.3578.10',
+ '72.0.3582.0',
+ '72.0.3581.4',
+ '71.0.3578.9',
+ '70.0.3538.67',
+ '72.0.3581.3',
+ '72.0.3581.2',
+ '72.0.3581.1',
+ '72.0.3581.0',
+ '71.0.3578.8',
+ '70.0.3538.66',
+ '72.0.3580.1',
+ '72.0.3580.0',
+ '71.0.3578.7',
+ '70.0.3538.65',
+ '71.0.3578.6',
+ '72.0.3579.1',
+ '72.0.3579.0',
+ '71.0.3578.5',
+ '70.0.3538.64',
+ '71.0.3578.4',
+ '71.0.3578.3',
+ '71.0.3578.2',
+ '71.0.3578.1',
+ '71.0.3578.0',
+ '70.0.3538.63',
+ '69.0.3497.128',
+ '70.0.3538.62',
+ '70.0.3538.61',
+ '70.0.3538.60',
+ '70.0.3538.59',
+ '71.0.3577.1',
+ '71.0.3577.0',
+ '70.0.3538.58',
+ '69.0.3497.127',
+ '71.0.3576.2',
+ '71.0.3576.1',
+ '71.0.3576.0',
+ '70.0.3538.57',
+ '70.0.3538.56',
+ '71.0.3575.2',
+ '70.0.3538.55',
+ '69.0.3497.126',
+ '70.0.3538.54',
+ '71.0.3575.1',
+ '71.0.3575.0',
+ '71.0.3574.1',
+ '71.0.3574.0',
+ '70.0.3538.53',
+ '69.0.3497.125',
+ '70.0.3538.52',
+ '71.0.3573.1',
+ '71.0.3573.0',
+ '70.0.3538.51',
+ '69.0.3497.124',
+ '71.0.3572.1',
+ '71.0.3572.0',
+ '70.0.3538.50',
+ '69.0.3497.123',
+ '71.0.3571.2',
+ '70.0.3538.49',
+ '69.0.3497.122',
+ '71.0.3571.1',
+ '71.0.3571.0',
+ '70.0.3538.48',
+ '69.0.3497.121',
+ '71.0.3570.1',
+ '71.0.3570.0',
+ '70.0.3538.47',
+ '69.0.3497.120',
+ '71.0.3568.2',
+ '71.0.3569.1',
+ '71.0.3569.0',
+ '70.0.3538.46',
+ '69.0.3497.119',
+ '70.0.3538.45',
+ '71.0.3568.1',
+ '71.0.3568.0',
+ '70.0.3538.44',
+ '69.0.3497.118',
+ '70.0.3538.43',
+ '70.0.3538.42',
+ '71.0.3567.1',
+ '71.0.3567.0',
+ '70.0.3538.41',
+ '69.0.3497.117',
+ '71.0.3566.1',
+ '71.0.3566.0',
+ '70.0.3538.40',
+ '69.0.3497.116',
+ '71.0.3565.1',
+ '71.0.3565.0',
+ '70.0.3538.39',
+ '69.0.3497.115',
+ '71.0.3564.1',
+ '71.0.3564.0',
+ '70.0.3538.38',
+ '69.0.3497.114',
+ '71.0.3563.0',
+ '71.0.3562.2',
+ '70.0.3538.37',
+ '69.0.3497.113',
+ '70.0.3538.36',
+ '70.0.3538.35',
+ '71.0.3562.1',
+ '71.0.3562.0',
+ '70.0.3538.34',
+ '69.0.3497.112',
+ '70.0.3538.33',
+ '71.0.3561.1',
+ '71.0.3561.0',
+ '70.0.3538.32',
+ '69.0.3497.111',
+ '71.0.3559.6',
+ '71.0.3560.1',
+ '71.0.3560.0',
+ '71.0.3559.5',
+ '71.0.3559.4',
+ '70.0.3538.31',
+ '69.0.3497.110',
+ '71.0.3559.3',
+ '70.0.3538.30',
+ '69.0.3497.109',
+ '71.0.3559.2',
+ '71.0.3559.1',
+ '71.0.3559.0',
+ '70.0.3538.29',
+ '69.0.3497.108',
+ '71.0.3558.2',
+ '71.0.3558.1',
+ '71.0.3558.0',
+ '70.0.3538.28',
+ '69.0.3497.107',
+ '71.0.3557.2',
+ '71.0.3557.1',
+ '71.0.3557.0',
+ '70.0.3538.27',
+ '69.0.3497.106',
+ '71.0.3554.4',
+ '70.0.3538.26',
+ '71.0.3556.1',
+ '71.0.3556.0',
+ '70.0.3538.25',
+ '71.0.3554.3',
+ '69.0.3497.105',
+ '71.0.3554.2',
+ '70.0.3538.24',
+ '69.0.3497.104',
+ '71.0.3555.2',
+ '70.0.3538.23',
+ '71.0.3555.1',
+ '71.0.3555.0',
+ '70.0.3538.22',
+ '69.0.3497.103',
+ '71.0.3554.1',
+ '71.0.3554.0',
+ '70.0.3538.21',
+ '69.0.3497.102',
+ '71.0.3553.3',
+ '70.0.3538.20',
+ '69.0.3497.101',
+ '71.0.3553.2',
+ '69.0.3497.100',
+ '71.0.3553.1',
+ '71.0.3553.0',
+ '70.0.3538.19',
+ '69.0.3497.99',
+ '69.0.3497.98',
+ '69.0.3497.97',
+ '71.0.3552.6',
+ '71.0.3552.5',
+ '71.0.3552.4',
+ '71.0.3552.3',
+ '71.0.3552.2',
+ '71.0.3552.1',
+ '71.0.3552.0',
+ '70.0.3538.18',
+ '69.0.3497.96',
+ '71.0.3551.3',
+ '71.0.3551.2',
+ '71.0.3551.1',
+ '71.0.3551.0',
+ '70.0.3538.17',
+ '69.0.3497.95',
+ '71.0.3550.3',
+ '71.0.3550.2',
+ '71.0.3550.1',
+ '71.0.3550.0',
+ '70.0.3538.16',
+ '69.0.3497.94',
+ '71.0.3549.1',
+ '71.0.3549.0',
+ '70.0.3538.15',
+ '69.0.3497.93',
+ '69.0.3497.92',
+ '71.0.3548.1',
+ '71.0.3548.0',
+ '70.0.3538.14',
+ '69.0.3497.91',
+ '71.0.3547.1',
+ '71.0.3547.0',
+ '70.0.3538.13',
+ '69.0.3497.90',
+ '71.0.3546.2',
+ '69.0.3497.89',
+ '71.0.3546.1',
+ '71.0.3546.0',
+ '70.0.3538.12',
+ '69.0.3497.88',
+ '71.0.3545.4',
+ '71.0.3545.3',
+ '71.0.3545.2',
+ '71.0.3545.1',
+ '71.0.3545.0',
+ '70.0.3538.11',
+ '69.0.3497.87',
+ '71.0.3544.5',
+ '71.0.3544.4',
+ '71.0.3544.3',
+ '71.0.3544.2',
+ '71.0.3544.1',
+ '71.0.3544.0',
+ '69.0.3497.86',
+ '70.0.3538.10',
+ '69.0.3497.85',
+ '70.0.3538.9',
+ '69.0.3497.84',
+ '71.0.3543.4',
+ '70.0.3538.8',
+ '71.0.3543.3',
+ '71.0.3543.2',
+ '71.0.3543.1',
+ '71.0.3543.0',
+ '70.0.3538.7',
+ '69.0.3497.83',
+ '71.0.3542.2',
+ '71.0.3542.1',
+ '71.0.3542.0',
+ '70.0.3538.6',
+ '69.0.3497.82',
+ '69.0.3497.81',
+ '71.0.3541.1',
+ '71.0.3541.0',
+ '70.0.3538.5',
+ '69.0.3497.80',
+ '71.0.3540.1',
+ '71.0.3540.0',
+ '70.0.3538.4',
+ '69.0.3497.79',
+ '70.0.3538.3',
+ '71.0.3539.1',
+ '71.0.3539.0',
+ '69.0.3497.78',
+ '68.0.3440.134',
+ '69.0.3497.77',
+ '70.0.3538.2',
+ '70.0.3538.1',
+ '70.0.3538.0',
+ '69.0.3497.76',
+ '68.0.3440.133',
+ '69.0.3497.75',
+ '70.0.3537.2',
+ '70.0.3537.1',
+ '70.0.3537.0',
+ '69.0.3497.74',
+ '68.0.3440.132',
+ '70.0.3536.0',
+ '70.0.3535.5',
+ '70.0.3535.4',
+ '70.0.3535.3',
+ '69.0.3497.73',
+ '68.0.3440.131',
+ '70.0.3532.8',
+ '70.0.3532.7',
+ '69.0.3497.72',
+ '69.0.3497.71',
+ '70.0.3535.2',
+ '70.0.3535.1',
+ '70.0.3535.0',
+ '69.0.3497.70',
+ '68.0.3440.130',
+ '69.0.3497.69',
+ '68.0.3440.129',
+ '70.0.3534.4',
+ '70.0.3534.3',
+ '70.0.3534.2',
+ '70.0.3534.1',
+ '70.0.3534.0',
+ '69.0.3497.68',
+ '68.0.3440.128',
+ '70.0.3533.2',
+ '70.0.3533.1',
+ '70.0.3533.0',
+ '69.0.3497.67',
+ '68.0.3440.127',
+ '70.0.3532.6',
+ '70.0.3532.5',
+ '70.0.3532.4',
+ '69.0.3497.66',
+ '68.0.3440.126',
+ '70.0.3532.3',
+ '70.0.3532.2',
+ '70.0.3532.1',
+ '69.0.3497.60',
+ '69.0.3497.65',
+ '69.0.3497.64',
+ '70.0.3532.0',
+ '70.0.3531.0',
+ '70.0.3530.4',
+ '70.0.3530.3',
+ '70.0.3530.2',
+ '69.0.3497.58',
+ '68.0.3440.125',
+ '69.0.3497.57',
+ '69.0.3497.56',
+ '69.0.3497.55',
+ '69.0.3497.54',
+ '70.0.3530.1',
+ '70.0.3530.0',
+ '69.0.3497.53',
+ '68.0.3440.124',
+ '69.0.3497.52',
+ '70.0.3529.3',
+ '70.0.3529.2',
+ '70.0.3529.1',
+ '70.0.3529.0',
+ '69.0.3497.51',
+ '70.0.3528.4',
+ '68.0.3440.123',
+ '70.0.3528.3',
+ '70.0.3528.2',
+ '70.0.3528.1',
+ '70.0.3528.0',
+ '69.0.3497.50',
+ '68.0.3440.122',
+ '70.0.3527.1',
+ '70.0.3527.0',
+ '69.0.3497.49',
+ '68.0.3440.121',
+ '70.0.3526.1',
+ '70.0.3526.0',
+ '68.0.3440.120',
+ '69.0.3497.48',
+ '69.0.3497.47',
+ '68.0.3440.119',
+ '68.0.3440.118',
+ '70.0.3525.5',
+ '70.0.3525.4',
+ '70.0.3525.3',
+ '68.0.3440.117',
+ '69.0.3497.46',
+ '70.0.3525.2',
+ '70.0.3525.1',
+ '70.0.3525.0',
+ '69.0.3497.45',
+ '68.0.3440.116',
+ '70.0.3524.4',
+ '70.0.3524.3',
+ '69.0.3497.44',
+ '70.0.3524.2',
+ '70.0.3524.1',
+ '70.0.3524.0',
+ '70.0.3523.2',
+ '69.0.3497.43',
+ '68.0.3440.115',
+ '70.0.3505.9',
+ '69.0.3497.42',
+ '70.0.3505.8',
+ '70.0.3523.1',
+ '70.0.3523.0',
+ '69.0.3497.41',
+ '68.0.3440.114',
+ '70.0.3505.7',
+ '69.0.3497.40',
+ '70.0.3522.1',
+ '70.0.3522.0',
+ '70.0.3521.2',
+ '69.0.3497.39',
+ '68.0.3440.113',
+ '70.0.3505.6',
+ '70.0.3521.1',
+ '70.0.3521.0',
+ '69.0.3497.38',
+ '68.0.3440.112',
+ '70.0.3520.1',
+ '70.0.3520.0',
+ '69.0.3497.37',
+ '68.0.3440.111',
+ '70.0.3519.3',
+ '70.0.3519.2',
+ '70.0.3519.1',
+ '70.0.3519.0',
+ '69.0.3497.36',
+ '68.0.3440.110',
+ '70.0.3518.1',
+ '70.0.3518.0',
+ '69.0.3497.35',
+ '69.0.3497.34',
+ '68.0.3440.109',
+ '70.0.3517.1',
+ '70.0.3517.0',
+ '69.0.3497.33',
+ '68.0.3440.108',
+ '69.0.3497.32',
+ '70.0.3516.3',
+ '70.0.3516.2',
+ '70.0.3516.1',
+ '70.0.3516.0',
+ '69.0.3497.31',
+ '68.0.3440.107',
+ '70.0.3515.4',
+ '68.0.3440.106',
+ '70.0.3515.3',
+ '70.0.3515.2',
+ '70.0.3515.1',
+ '70.0.3515.0',
+ '69.0.3497.30',
+ '68.0.3440.105',
+ '68.0.3440.104',
+ '70.0.3514.2',
+ '70.0.3514.1',
+ '70.0.3514.0',
+ '69.0.3497.29',
+ '68.0.3440.103',
+ '70.0.3513.1',
+ '70.0.3513.0',
+ '69.0.3497.28',
+ )
+ return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
+
+
+std_headers = {
+ 'User-Agent': random_user_agent(),
+ 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+ 'Accept-Encoding': 'gzip, deflate',
+ 'Accept-Language': 'en-us,en;q=0.5',
+}
+
+
+USER_AGENTS = {
+ 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
+}
+
+
+NO_DEFAULT = object()
+
+ENGLISH_MONTH_NAMES = [
+ 'January', 'February', 'March', 'April', 'May', 'June',
+ 'July', 'August', 'September', 'October', 'November', 'December']
+
+MONTH_NAMES = {
+ 'en': ENGLISH_MONTH_NAMES,
+ 'fr': [
+ 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
+ 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
+}
+
+KNOWN_EXTENSIONS = (
+ 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
+ 'flv', 'f4v', 'f4a', 'f4b',
+ 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
+ 'mkv', 'mka', 'mk3d',
+ 'avi', 'divx',
+ 'mov',
+ 'asf', 'wmv', 'wma',
+ '3gp', '3g2',
+ 'mp3',
+ 'flac',
+ 'ape',
+ 'wav',
+ 'f4f', 'f4m', 'm3u8', 'smil')
+
+# needed for sanitizing filenames in restricted mode
+ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
+ itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
+ 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
+
+DATE_FORMATS = (
+ '%d %B %Y',
+ '%d %b %Y',
+ '%B %d %Y',
+ '%B %dst %Y',
+ '%B %dnd %Y',
+ '%B %drd %Y',
+ '%B %dth %Y',
+ '%b %d %Y',
+ '%b %dst %Y',
+ '%b %dnd %Y',
+ '%b %drd %Y',
+ '%b %dth %Y',
+ '%b %dst %Y %I:%M',
+ '%b %dnd %Y %I:%M',
+ '%b %drd %Y %I:%M',
+ '%b %dth %Y %I:%M',
+ '%Y %m %d',
+ '%Y-%m-%d',
+ '%Y/%m/%d',
+ '%Y/%m/%d %H:%M',
+ '%Y/%m/%d %H:%M:%S',
+ '%Y-%m-%d %H:%M',
+ '%Y-%m-%d %H:%M:%S',
+ '%Y-%m-%d %H:%M:%S.%f',
+ '%d.%m.%Y %H:%M',
+ '%d.%m.%Y %H.%M',
+ '%Y-%m-%dT%H:%M:%SZ',
+ '%Y-%m-%dT%H:%M:%S.%fZ',
+ '%Y-%m-%dT%H:%M:%S.%f0Z',
+ '%Y-%m-%dT%H:%M:%S',
+ '%Y-%m-%dT%H:%M:%S.%f',
+ '%Y-%m-%dT%H:%M',
+ '%b %d %Y at %H:%M',
+ '%b %d %Y at %H:%M:%S',
+ '%B %d %Y at %H:%M',
+ '%B %d %Y at %H:%M:%S',
+)
+
+DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
+DATE_FORMATS_DAY_FIRST.extend([
+ '%d-%m-%Y',
+ '%d.%m.%Y',
+ '%d.%m.%y',
+ '%d/%m/%Y',
+ '%d/%m/%y',
+ '%d/%m/%Y %H:%M:%S',
+])
+
+DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
+DATE_FORMATS_MONTH_FIRST.extend([
+ '%m-%d-%Y',
+ '%m.%d.%Y',
+ '%m/%d/%Y',
+ '%m/%d/%y',
+ '%m/%d/%Y %H:%M:%S',
+])
+
+PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
+JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
+
+
+def preferredencoding():
+ """Get preferred encoding.
+
+ Returns the best encoding scheme for the system, based on
+ locale.getpreferredencoding() and some further tweaks.
+ """
+ try:
+ pref = locale.getpreferredencoding()
+ 'TEST'.encode(pref)
+ except Exception:
+ pref = 'UTF-8'
+
+ return pref
+
+
+def write_json_file(obj, fn):
+ """ Encode obj as JSON and write it to fn, atomically if possible """
+
+ fn = encodeFilename(fn)
+ if sys.version_info < (3, 0) and sys.platform != 'win32':
+ encoding = get_filesystem_encoding()
+ # os.path.basename returns a bytes object, but NamedTemporaryFile
+ # will fail if the filename contains non ascii characters unless we
+ # use a unicode object
+ path_basename = lambda f: os.path.basename(fn).decode(encoding)
+ # the same for os.path.dirname
+ path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
+ else:
+ path_basename = os.path.basename
+ path_dirname = os.path.dirname
+
+ args = {
+ 'suffix': '.tmp',
+ 'prefix': path_basename(fn) + '.',
+ 'dir': path_dirname(fn),
+ 'delete': False,
+ }
+
+ # In Python 2.x, json.dump expects a bytestream.
+ # In Python 3.x, it writes to a character stream
+ if sys.version_info < (3, 0):
+ args['mode'] = 'wb'
+ else:
+ args.update({
+ 'mode': 'w',
+ 'encoding': 'utf-8',
+ })
+
+ tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
+
+ try:
+ with tf:
+ json.dump(obj, tf)
+ if sys.platform == 'win32':
+ # Need to remove existing file on Windows, else os.rename raises
+ # WindowsError or FileExistsError.
+ try:
+ os.unlink(fn)
+ except OSError:
+ pass
+ try:
+ mask = os.umask(0)
+ os.umask(mask)
+ os.chmod(tf.name, 0o666 & ~mask)
+ except OSError:
+ pass
+ os.rename(tf.name, fn)
+ except Exception:
+ try:
+ os.remove(tf.name)
+ except OSError:
+ pass
+ raise
+
+
+if sys.version_info >= (2, 7):
+ def find_xpath_attr(node, xpath, key, val=None):
+ """ Find the xpath xpath[@key=val] """
+ assert re.match(r'^[a-zA-Z_-]+$', key)
+ expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
+ return node.find(expr)
+else:
+ def find_xpath_attr(node, xpath, key, val=None):
+ for f in node.findall(compat_xpath(xpath)):
+ if key not in f.attrib:
+ continue
+ if val is None or f.attrib.get(key) == val:
+ return f
+ return None
+
+# On python2.6 the xml.etree.ElementTree.Element methods don't support
+# the namespace parameter
+
+
+def xpath_with_ns(path, ns_map):
+ components = [c.split(':') for c in path.split('/')]
+ replaced = []
+ for c in components:
+ if len(c) == 1:
+ replaced.append(c[0])
+ else:
+ ns, tag = c
+ replaced.append('{%s}%s' % (ns_map[ns], tag))
+ return '/'.join(replaced)
+
+
+def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
+ def _find_xpath(xpath):
+ return node.find(compat_xpath(xpath))
+
+ if isinstance(xpath, (str, compat_str)):
+ n = _find_xpath(xpath)
+ else:
+ for xp in xpath:
+ n = _find_xpath(xp)
+ if n is not None:
+ break
+
+ if n is None:
+ if default is not NO_DEFAULT:
+ return default
+ elif fatal:
+ name = xpath if name is None else name
+ raise ExtractorError('Could not find XML element %s' % name)
+ else:
+ return None
+ return n
+
+
+def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
+ n = xpath_element(node, xpath, name, fatal=fatal, default=default)
+ if n is None or n == default:
+ return n
+ if n.text is None:
+ if default is not NO_DEFAULT:
+ return default
+ elif fatal:
+ name = xpath if name is None else name
+ raise ExtractorError('Could not find XML element\'s text %s' % name)
+ else:
+ return None
+ return n.text
+
+
+def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
+ n = find_xpath_attr(node, xpath, key)
+ if n is None:
+ if default is not NO_DEFAULT:
+ return default
+ elif fatal:
+ name = '%s[@%s]' % (xpath, key) if name is None else name
+ raise ExtractorError('Could not find XML attribute %s' % name)
+ else:
+ return None
+ return n.attrib[key]
+
+
+def get_element_by_id(id, html):
+ """Return the content of the tag with the specified ID in the passed HTML document"""
+ return get_element_by_attribute('id', id, html)
+
+
+def get_element_by_class(class_name, html):
+ """Return the content of the first tag with the specified class in the passed HTML document"""
+ retval = get_elements_by_class(class_name, html)
+ return retval[0] if retval else None
+
+
+def get_element_by_attribute(attribute, value, html, escape_value=True):
+ retval = get_elements_by_attribute(attribute, value, html, escape_value)
+ return retval[0] if retval else None
+
+
+def get_elements_by_class(class_name, html):
+ """Return the content of all tags with the specified class in the passed HTML document as a list"""
+ return get_elements_by_attribute(
+ 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
+ html, escape_value=False)
+
+
+def get_elements_by_attribute(attribute, value, html, escape_value=True):
+ """Return the content of the tag with the specified attribute in the passed HTML document"""
+
+ value = re.escape(value) if escape_value else value
+
+ retlist = []
+ for m in re.finditer(r'''(?xs)
+ <([a-zA-Z0-9:._-]+)
+ (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
+ \s+%s=['"]?%s['"]?
+ (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
+ \s*>
+ (?P<content>.*?)
+ </\1>
+ ''' % (re.escape(attribute), value), html):
+ res = m.group('content')
+
+ if res.startswith('"') or res.startswith("'"):
+ res = res[1:-1]
+
+ retlist.append(unescapeHTML(res))
+
+ return retlist
+
+
+class HTMLAttributeParser(compat_HTMLParser):
+ """Trivial HTML parser to gather the attributes for a single element"""
+ def __init__(self):
+ self.attrs = {}
+ compat_HTMLParser.__init__(self)
+
+ def handle_starttag(self, tag, attrs):
+ self.attrs = dict(attrs)
+
+
+def extract_attributes(html_element):
+ """Given a string for an HTML element such as
+ <el
+ a="foo" B="bar" c="&98;az" d=boz
+ empty= noval entity="&amp;"
+ sq='"' dq="'"
+ >
+ Decode and return a dictionary of attributes.
+ {
+ 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
+ 'empty': '', 'noval': None, 'entity': '&',
+ 'sq': '"', 'dq': '\''
+ }.
+ NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
+ but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
+ """
+ parser = HTMLAttributeParser()
+ try:
+ parser.feed(html_element)
+ parser.close()
+ # Older Python may throw HTMLParseError in case of malformed HTML
+ except compat_HTMLParseError:
+ pass
+ return parser.attrs
+
+
+def clean_html(html):
+ """Clean an HTML snippet into a readable string"""
+
+ if html is None: # Convenience for sanitizing descriptions etc.
+ return html
+
+ # Newline vs <br />
+ html = html.replace('\n', ' ')
+ html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
+ html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
+ # Strip html tags
+ html = re.sub('<.*?>', '', html)
+ # Replace html entities
+ html = unescapeHTML(html)
+ return html.strip()
+
+
+def sanitize_open(filename, open_mode):
+ """Try to open the given filename, and slightly tweak it if this fails.
+
+ Attempts to open the given filename. If this fails, it tries to change
+ the filename slightly, step by step, until it's either able to open it
+ or it fails and raises a final exception, like the standard open()
+ function.
+
+ It returns the tuple (stream, definitive_file_name).
+ """
+ try:
+ if filename == '-':
+ if sys.platform == 'win32':
+ import msvcrt
+ msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
+ return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
+ stream = open(encodeFilename(filename), open_mode)
+ return (stream, filename)
+ except (IOError, OSError) as err:
+ if err.errno in (errno.EACCES,):
+ raise
+
+ # In case of error, try to remove win32 forbidden chars
+ alt_filename = sanitize_path(filename)
+ if alt_filename == filename:
+ raise
+ else:
+ # An exception here should be caught in the caller
+ stream = open(encodeFilename(alt_filename), open_mode)
+ return (stream, alt_filename)
+
+
+def timeconvert(timestr):
+ """Convert RFC 2822 defined time string into system timestamp"""
+ timestamp = None
+ timetuple = email.utils.parsedate_tz(timestr)
+ if timetuple is not None:
+ timestamp = email.utils.mktime_tz(timetuple)
+ return timestamp
+
+
+def sanitize_filename(s, restricted=False, is_id=False):
+ """Sanitizes a string so it could be used as part of a filename.
+ If restricted is set, use a stricter subset of allowed characters.
+ Set is_id if this is not an arbitrary string, but an ID that should be kept
+ if possible.
+ """
+ def replace_insane(char):
+ if restricted and char in ACCENT_CHARS:
+ return ACCENT_CHARS[char]
+ if char == '?' or ord(char) < 32 or ord(char) == 127:
+ return ''
+ elif char == '"':
+ return '' if restricted else '\''
+ elif char == ':':
+ return '_-' if restricted else ' -'
+ elif char in '\\/|*<>':
+ return '_'
+ if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
+ return '_'
+ if restricted and ord(char) > 127:
+ return '_'
+ return char
+
+ # Handle timestamps
+ s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
+ result = ''.join(map(replace_insane, s))
+ if not is_id:
+ while '__' in result:
+ result = result.replace('__', '_')
+ result = result.strip('_')
+ # Common case of "Foreign band name - English song title"
+ if restricted and result.startswith('-_'):
+ result = result[2:]
+ if result.startswith('-'):
+ result = '_' + result[len('-'):]
+ result = result.lstrip('.')
+ if not result:
+ result = '_'
+ return result
+
+
+def sanitize_path(s):
+ """Sanitizes and normalizes path on Windows"""
+ if sys.platform != 'win32':
+ return s
+ drive_or_unc, _ = os.path.splitdrive(s)
+ if sys.version_info < (2, 7) and not drive_or_unc:
+ drive_or_unc, _ = os.path.splitunc(s)
+ norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
+ if drive_or_unc:
+ norm_path.pop(0)
+ sanitized_path = [
+ path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
+ for path_part in norm_path]
+ if drive_or_unc:
+ sanitized_path.insert(0, drive_or_unc + os.path.sep)
+ return os.path.join(*sanitized_path)
+
+
+def sanitize_url(url):
+ # Prepend protocol-less URLs with `http:` scheme in order to mitigate
+ # the number of unwanted failures due to missing protocol
+ if url.startswith('//'):
+ return 'http:%s' % url
+ # Fix some common typos seen so far
+ COMMON_TYPOS = (
+ # https://github.com/ytdl-org/youtube-dl/issues/15649
+ (r'^httpss://', r'https://'),
+ # https://bx1.be/lives/direct-tv/
+ (r'^rmtp([es]?)://', r'rtmp\1://'),
+ )
+ for mistake, fixup in COMMON_TYPOS:
+ if re.match(mistake, url):
+ return re.sub(mistake, fixup, url)
+ return url
+
+
+def sanitized_Request(url, *args, **kwargs):
+ return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
+
+
+def expand_path(s):
+ """Expand shell variables and ~"""
+ return os.path.expandvars(compat_expanduser(s))
+
+
+def orderedSet(iterable):
+ """ Remove all duplicates from the input iterable """
+ res = []
+ for el in iterable:
+ if el not in res:
+ res.append(el)
+ return res
+
+
+def _htmlentity_transform(entity_with_semicolon):
+ """Transforms an HTML entity to a character."""
+ entity = entity_with_semicolon[:-1]
+
+ # Known non-numeric HTML entity
+ if entity in compat_html_entities.name2codepoint:
+ return compat_chr(compat_html_entities.name2codepoint[entity])
+
+ # TODO: HTML5 allows entities without a semicolon. For example,
+ # '&Eacuteric' should be decoded as 'Éric'.
+ if entity_with_semicolon in compat_html_entities_html5:
+ return compat_html_entities_html5[entity_with_semicolon]
+
+ mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
+ if mobj is not None:
+ numstr = mobj.group(1)
+ if numstr.startswith('x'):
+ base = 16
+ numstr = '0%s' % numstr
+ else:
+ base = 10
+ # See https://github.com/ytdl-org/youtube-dl/issues/7518
+ try:
+ return compat_chr(int(numstr, base))
+ except ValueError:
+ pass
+
+ # Unknown entity in name, return its literal representation
+ return '&%s;' % entity
+
+
+def unescapeHTML(s):
+ if s is None:
+ return None
+ assert type(s) == compat_str
+
+ return re.sub(
+ r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
+
+
+def get_subprocess_encoding():
+ if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
+ # For subprocess calls, encode with locale encoding
+ # Refer to http://stackoverflow.com/a/9951851/35070
+ encoding = preferredencoding()
+ else:
+ encoding = sys.getfilesystemencoding()
+ if encoding is None:
+ encoding = 'utf-8'
+ return encoding
+
+
+def encodeFilename(s, for_subprocess=False):
+ """
+ @param s The name of the file
+ """
+
+ assert type(s) == compat_str
+
+ # Python 3 has a Unicode API
+ if sys.version_info >= (3, 0):
+ return s
+
+ # Pass '' directly to use Unicode APIs on Windows 2000 and up
+ # (Detecting Windows NT 4 is tricky because 'major >= 4' would
+ # match Windows 9x series as well. Besides, NT 4 is obsolete.)
+ if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
+ return s
+
+ # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
+ if sys.platform.startswith('java'):
+ return s
+
+ return s.encode(get_subprocess_encoding(), 'ignore')
+
+
+def decodeFilename(b, for_subprocess=False):
+
+ if sys.version_info >= (3, 0):
+ return b
+
+ if not isinstance(b, bytes):
+ return b
+
+ return b.decode(get_subprocess_encoding(), 'ignore')
+
+
+def encodeArgument(s):
+ if not isinstance(s, compat_str):
+ # Legacy code that uses byte strings
+ # Uncomment the following line after fixing all post processors
+ # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
+ s = s.decode('ascii')
+ return encodeFilename(s, True)
+
+
+def decodeArgument(b):
+ return decodeFilename(b, True)
+
+
+def decodeOption(optval):
+ if optval is None:
+ return optval
+ if isinstance(optval, bytes):
+ optval = optval.decode(preferredencoding())
+
+ assert isinstance(optval, compat_str)
+ return optval
+
+
+def formatSeconds(secs):
+ if secs > 3600:
+ return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
+ elif secs > 60:
+ return '%d:%02d' % (secs // 60, secs % 60)
+ else:
+ return '%d' % secs
+
+
+def make_HTTPS_handler(params, **kwargs):
+ opts_no_check_certificate = params.get('nocheckcertificate', False)
+ if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
+ context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
+ if opts_no_check_certificate:
+ context.check_hostname = False
+ context.verify_mode = ssl.CERT_NONE
+ try:
+ return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
+ except TypeError:
+ # Python 2.7.8
+ # (create_default_context present but HTTPSHandler has no context=)
+ pass
+
+ if sys.version_info < (3, 2):
+ return YoutubeDLHTTPSHandler(params, **kwargs)
+ else: # Python < 3.4
+ context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
+ context.verify_mode = (ssl.CERT_NONE
+ if opts_no_check_certificate
+ else ssl.CERT_REQUIRED)
+ context.set_default_verify_paths()
+ return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
+
+
+def bug_reports_message():
+ if ytdl_is_updateable():
+ update_cmd = 'type youtube-dlc -U to update'
+ else:
+ update_cmd = 'see https://yt-dl.org/update on how to update'
+ msg = '; please report this issue on https://yt-dl.org/bug .'
+ msg += ' Make sure you are using the latest version; %s.' % update_cmd
+ msg += ' Be sure to call youtube-dlc with the --verbose flag and include its complete output.'
+ return msg
+
+
+class YoutubeDLError(Exception):
+ """Base exception for YoutubeDL errors."""
+ pass
+
+
+class ExtractorError(YoutubeDLError):
+ """Error during info extraction."""
+
+ def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
+ """ tb, if given, is the original traceback (so that it can be printed out).
+ If expected is set, this is a normal error message and most likely not a bug in youtube-dlc.
+ """
+
+ if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
+ expected = True
+ if video_id is not None:
+ msg = video_id + ': ' + msg
+ if cause:
+ msg += ' (caused by %r)' % cause
+ if not expected:
+ msg += bug_reports_message()
+ super(ExtractorError, self).__init__(msg)
+
+ self.traceback = tb
+ self.exc_info = sys.exc_info() # preserve original exception
+ self.cause = cause
+ self.video_id = video_id
+
+ def format_traceback(self):
+ if self.traceback is None:
+ return None
+ return ''.join(traceback.format_tb(self.traceback))
+
+
+class UnsupportedError(ExtractorError):
+ def __init__(self, url):
+ super(UnsupportedError, self).__init__(
+ 'Unsupported URL: %s' % url, expected=True)
+ self.url = url
+
+
+class RegexNotFoundError(ExtractorError):
+ """Error when a regex didn't match"""
+ pass
+
+
+class GeoRestrictedError(ExtractorError):
+ """Geographic restriction Error exception.
+
+ This exception may be thrown when a video is not available from your
+ geographic location due to geographic restrictions imposed by a website.
+ """
+ def __init__(self, msg, countries=None):
+ super(GeoRestrictedError, self).__init__(msg, expected=True)
+ self.msg = msg
+ self.countries = countries
+
+
+class DownloadError(YoutubeDLError):
+ """Download Error exception.
+
+ This exception may be thrown by FileDownloader objects if they are not
+ configured to continue on errors. They will contain the appropriate
+ error message.
+ """
+
+ def __init__(self, msg, exc_info=None):
+ """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
+ super(DownloadError, self).__init__(msg)
+ self.exc_info = exc_info
+
+
+class SameFileError(YoutubeDLError):
+ """Same File exception.
+
+ This exception will be thrown by FileDownloader objects if they detect
+ multiple files would have to be downloaded to the same file on disk.
+ """
+ pass
+
+
+class PostProcessingError(YoutubeDLError):
+ """Post Processing exception.
+
+ This exception may be raised by PostProcessor's .run() method to
+ indicate an error in the postprocessing task.
+ """
+
+ def __init__(self, msg):
+ super(PostProcessingError, self).__init__(msg)
+ self.msg = msg
+
+
+class MaxDownloadsReached(YoutubeDLError):
+ """ --max-downloads limit has been reached. """
+ pass
+
+
+class UnavailableVideoError(YoutubeDLError):
+ """Unavailable Format exception.
+
+ This exception will be thrown when a video is requested
+ in a format that is not available for that video.
+ """
+ pass
+
+
+class ContentTooShortError(YoutubeDLError):
+ """Content Too Short exception.
+
+ This exception may be raised by FileDownloader objects when a file they
+ download is too small for what the server announced first, indicating
+ the connection was probably interrupted.
+ """
+
+ def __init__(self, downloaded, expected):
+ super(ContentTooShortError, self).__init__(
+ 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
+ )
+ # Both in bytes
+ self.downloaded = downloaded
+ self.expected = expected
+
+
+class XAttrMetadataError(YoutubeDLError):
+ def __init__(self, code=None, msg='Unknown error'):
+ super(XAttrMetadataError, self).__init__(msg)
+ self.code = code
+ self.msg = msg
+
+ # Parsing code and msg
+ if (self.code in (errno.ENOSPC, errno.EDQUOT)
+ or 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
+ self.reason = 'NO_SPACE'
+ elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
+ self.reason = 'VALUE_TOO_LONG'
+ else:
+ self.reason = 'NOT_SUPPORTED'
+
+
+class XAttrUnavailableError(YoutubeDLError):
+ pass
+
+
+def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
+ # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
+ # expected HTTP responses to meet HTTP/1.0 or later (see also
+ # https://github.com/ytdl-org/youtube-dl/issues/6727)
+ if sys.version_info < (3, 0):
+ kwargs['strict'] = True
+ hc = http_class(*args, **compat_kwargs(kwargs))
+ source_address = ydl_handler._params.get('source_address')
+
+ if source_address is not None:
+ # This is to workaround _create_connection() from socket where it will try all
+ # address data from getaddrinfo() including IPv6. This filters the result from
+ # getaddrinfo() based on the source_address value.
+ # This is based on the cpython socket.create_connection() function.
+ # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
+ def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
+ host, port = address
+ err = None
+ addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
+ af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
+ ip_addrs = [addr for addr in addrs if addr[0] == af]
+ if addrs and not ip_addrs:
+ ip_version = 'v4' if af == socket.AF_INET else 'v6'
+ raise socket.error(
+ "No remote IP%s addresses available for connect, can't use '%s' as source address"
+ % (ip_version, source_address[0]))
+ for res in ip_addrs:
+ af, socktype, proto, canonname, sa = res
+ sock = None
+ try:
+ sock = socket.socket(af, socktype, proto)
+ if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
+ sock.settimeout(timeout)
+ sock.bind(source_address)
+ sock.connect(sa)
+ err = None # Explicitly break reference cycle
+ return sock
+ except socket.error as _:
+ err = _
+ if sock is not None:
+ sock.close()
+ if err is not None:
+ raise err
+ else:
+ raise socket.error('getaddrinfo returns an empty list')
+ if hasattr(hc, '_create_connection'):
+ hc._create_connection = _create_connection
+ sa = (source_address, 0)
+ if hasattr(hc, 'source_address'): # Python 2.7+
+ hc.source_address = sa
+ else: # Python 2.6
+ def _hc_connect(self, *args, **kwargs):
+ sock = _create_connection(
+ (self.host, self.port), self.timeout, sa)
+ if is_https:
+ self.sock = ssl.wrap_socket(
+ sock, self.key_file, self.cert_file,
+ ssl_version=ssl.PROTOCOL_TLSv1)
+ else:
+ self.sock = sock
+ hc.connect = functools.partial(_hc_connect, hc)
+
+ return hc
+
+
+def handle_youtubedl_headers(headers):
+ filtered_headers = headers
+
+ if 'Youtubedl-no-compression' in filtered_headers:
+ filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
+ del filtered_headers['Youtubedl-no-compression']
+
+ return filtered_headers
+
+
+class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
+ """Handler for HTTP requests and responses.
+
+ This class, when installed with an OpenerDirector, automatically adds
+ the standard headers to every HTTP request and handles gzipped and
+ deflated responses from web servers. If compression is to be avoided in
+ a particular request, the original request in the program code only has
+ to include the HTTP header "Youtubedl-no-compression", which will be
+ removed before making the real request.
+
+ Part of this code was copied from:
+
+ http://techknack.net/python-urllib2-handlers/
+
+ Andrew Rowls, the author of that code, agreed to release it to the
+ public domain.
+ """
+
+ def __init__(self, params, *args, **kwargs):
+ compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
+ self._params = params
+
+ def http_open(self, req):
+ conn_class = compat_http_client.HTTPConnection
+
+ socks_proxy = req.headers.get('Ytdl-socks-proxy')
+ if socks_proxy:
+ conn_class = make_socks_conn_class(conn_class, socks_proxy)
+ del req.headers['Ytdl-socks-proxy']
+
+ return self.do_open(functools.partial(
+ _create_http_connection, self, conn_class, False),
+ req)
+
+ @staticmethod
+ def deflate(data):
+ try:
+ return zlib.decompress(data, -zlib.MAX_WBITS)
+ except zlib.error:
+ return zlib.decompress(data)
+
+ def http_request(self, req):
+ # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
+ # always respected by websites, some tend to give out URLs with non percent-encoded
+ # non-ASCII characters (see telemb.py, ard.py [#3412])
+ # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
+ # To work around aforementioned issue we will replace request's original URL with
+ # percent-encoded one
+ # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
+ # the code of this workaround has been moved here from YoutubeDL.urlopen()
+ url = req.get_full_url()
+ url_escaped = escape_url(url)
+
+ # Substitute URL if any change after escaping
+ if url != url_escaped:
+ req = update_Request(req, url=url_escaped)
+
+ for h, v in std_headers.items():
+ # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
+ # The dict keys are capitalized because of this bug by urllib
+ if h.capitalize() not in req.headers:
+ req.add_header(h, v)
+
+ req.headers = handle_youtubedl_headers(req.headers)
+
+ if sys.version_info < (2, 7) and '#' in req.get_full_url():
+ # Python 2.6 is brain-dead when it comes to fragments
+ req._Request__original = req._Request__original.partition('#')[0]
+ req._Request__r_type = req._Request__r_type.partition('#')[0]
+
+ return req
+
+ def http_response(self, req, resp):
+ old_resp = resp
+ # gzip
+ if resp.headers.get('Content-encoding', '') == 'gzip':
+ content = resp.read()
+ gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
+ try:
+ uncompressed = io.BytesIO(gz.read())
+ except IOError as original_ioerror:
+ # There may be junk add the end of the file
+ # See http://stackoverflow.com/q/4928560/35070 for details
+ for i in range(1, 1024):
+ try:
+ gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
+ uncompressed = io.BytesIO(gz.read())
+ except IOError:
+ continue
+ break
+ else:
+ raise original_ioerror
+ resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
+ resp.msg = old_resp.msg
+ del resp.headers['Content-encoding']
+ # deflate
+ if resp.headers.get('Content-encoding', '') == 'deflate':
+ gz = io.BytesIO(self.deflate(resp.read()))
+ resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
+ resp.msg = old_resp.msg
+ del resp.headers['Content-encoding']
+ # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
+ # https://github.com/ytdl-org/youtube-dl/issues/6457).
+ if 300 <= resp.code < 400:
+ location = resp.headers.get('Location')
+ if location:
+ # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
+ if sys.version_info >= (3, 0):
+ location = location.encode('iso-8859-1').decode('utf-8')
+ else:
+ location = location.decode('utf-8')
+ location_escaped = escape_url(location)
+ if location != location_escaped:
+ del resp.headers['Location']
+ if sys.version_info < (3, 0):
+ location_escaped = location_escaped.encode('utf-8')
+ resp.headers['Location'] = location_escaped
+ return resp
+
+ https_request = http_request
+ https_response = http_response
+
+
+def make_socks_conn_class(base_class, socks_proxy):
+ assert issubclass(base_class, (
+ compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
+
+ url_components = compat_urlparse.urlparse(socks_proxy)
+ if url_components.scheme.lower() == 'socks5':
+ socks_type = ProxyType.SOCKS5
+ elif url_components.scheme.lower() in ('socks', 'socks4'):
+ socks_type = ProxyType.SOCKS4
+ elif url_components.scheme.lower() == 'socks4a':
+ socks_type = ProxyType.SOCKS4A
+
+ def unquote_if_non_empty(s):
+ if not s:
+ return s
+ return compat_urllib_parse_unquote_plus(s)
+
+ proxy_args = (
+ socks_type,
+ url_components.hostname, url_components.port or 1080,
+ True, # Remote DNS
+ unquote_if_non_empty(url_components.username),
+ unquote_if_non_empty(url_components.password),
+ )
+
+ class SocksConnection(base_class):
+ def connect(self):
+ self.sock = sockssocket()
+ self.sock.setproxy(*proxy_args)
+ if type(self.timeout) in (int, float):
+ self.sock.settimeout(self.timeout)
+ self.sock.connect((self.host, self.port))
+
+ if isinstance(self, compat_http_client.HTTPSConnection):
+ if hasattr(self, '_context'): # Python > 2.6
+ self.sock = self._context.wrap_socket(
+ self.sock, server_hostname=self.host)
+ else:
+ self.sock = ssl.wrap_socket(self.sock)
+
+ return SocksConnection
+
+
+class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
+ def __init__(self, params, https_conn_class=None, *args, **kwargs):
+ compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
+ self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
+ self._params = params
+
+ def https_open(self, req):
+ kwargs = {}
+ conn_class = self._https_conn_class
+
+ if hasattr(self, '_context'): # python > 2.6
+ kwargs['context'] = self._context
+ if hasattr(self, '_check_hostname'): # python 3.x
+ kwargs['check_hostname'] = self._check_hostname
+
+ socks_proxy = req.headers.get('Ytdl-socks-proxy')
+ if socks_proxy:
+ conn_class = make_socks_conn_class(conn_class, socks_proxy)
+ del req.headers['Ytdl-socks-proxy']
+
+ return self.do_open(functools.partial(
+ _create_http_connection, self, conn_class, True),
+ req, **kwargs)
+
+
+class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
+ """
+ See [1] for cookie file format.
+
+ 1. https://curl.haxx.se/docs/http-cookies.html
+ """
+ _HTTPONLY_PREFIX = '#HttpOnly_'
+ _ENTRY_LEN = 7
+ _HEADER = '''# Netscape HTTP Cookie File
+# This file is generated by youtube-dlc. Do not edit.
+
+'''
+ _CookieFileEntry = collections.namedtuple(
+ 'CookieFileEntry',
+ ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
+
+ def save(self, filename=None, ignore_discard=False, ignore_expires=False):
+ """
+ Save cookies to a file.
+
+ Most of the code is taken from CPython 3.8 and slightly adapted
+ to support cookie files with UTF-8 in both python 2 and 3.
+ """
+ if filename is None:
+ if self.filename is not None:
+ filename = self.filename
+ else:
+ raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
+
+ # Store session cookies with `expires` set to 0 instead of an empty
+ # string
+ for cookie in self:
+ if cookie.expires is None:
+ cookie.expires = 0
+
+ with io.open(filename, 'w', encoding='utf-8') as f:
+ f.write(self._HEADER)
+ now = time.time()
+ for cookie in self:
+ if not ignore_discard and cookie.discard:
+ continue
+ if not ignore_expires and cookie.is_expired(now):
+ continue
+ if cookie.secure:
+ secure = 'TRUE'
+ else:
+ secure = 'FALSE'
+ if cookie.domain.startswith('.'):
+ initial_dot = 'TRUE'
+ else:
+ initial_dot = 'FALSE'
+ if cookie.expires is not None:
+ expires = compat_str(cookie.expires)
+ else:
+ expires = ''
+ if cookie.value is None:
+ # cookies.txt regards 'Set-Cookie: foo' as a cookie
+ # with no name, whereas http.cookiejar regards it as a
+ # cookie with no value.
+ name = ''
+ value = cookie.name
+ else:
+ name = cookie.name
+ value = cookie.value
+ f.write(
+ '\t'.join([cookie.domain, initial_dot, cookie.path,
+ secure, expires, name, value]) + '\n')
+
+ def load(self, filename=None, ignore_discard=False, ignore_expires=False):
+ """Load cookies from a file."""
+ if filename is None:
+ if self.filename is not None:
+ filename = self.filename
+ else:
+ raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
+
+ def prepare_line(line):
+ if line.startswith(self._HTTPONLY_PREFIX):
+ line = line[len(self._HTTPONLY_PREFIX):]
+ # comments and empty lines are fine
+ if line.startswith('#') or not line.strip():
+ return line
+ cookie_list = line.split('\t')
+ if len(cookie_list) != self._ENTRY_LEN:
+ raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
+ cookie = self._CookieFileEntry(*cookie_list)
+ if cookie.expires_at and not cookie.expires_at.isdigit():
+ raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
+ return line
+
+ cf = io.StringIO()
+ with io.open(filename, encoding='utf-8') as f:
+ for line in f:
+ try:
+ cf.write(prepare_line(line))
+ except compat_cookiejar.LoadError as e:
+ write_string(
+ 'WARNING: skipping cookie file entry due to %s: %r\n'
+ % (e, line), sys.stderr)
+ continue
+ cf.seek(0)
+ self._really_load(cf, filename, ignore_discard, ignore_expires)
+ # Session cookies are denoted by either `expires` field set to
+ # an empty string or 0. MozillaCookieJar only recognizes the former
+ # (see [1]). So we need force the latter to be recognized as session
+ # cookies on our own.
+ # Session cookies may be important for cookies-based authentication,
+ # e.g. usually, when user does not check 'Remember me' check box while
+ # logging in on a site, some important cookies are stored as session
+ # cookies so that not recognizing them will result in failed login.
+ # 1. https://bugs.python.org/issue17164
+ for cookie in self:
+ # Treat `expires=0` cookies as session cookies
+ if cookie.expires == 0:
+ cookie.expires = None
+ cookie.discard = True
+
+
+class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
+ def __init__(self, cookiejar=None):
+ compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
+
+ def http_response(self, request, response):
+ # Python 2 will choke on next HTTP request in row if there are non-ASCII
+ # characters in Set-Cookie HTTP header of last response (see
+ # https://github.com/ytdl-org/youtube-dl/issues/6769).
+ # In order to at least prevent crashing we will percent encode Set-Cookie
+ # header before HTTPCookieProcessor starts processing it.
+ # if sys.version_info < (3, 0) and response.headers:
+ # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
+ # set_cookie = response.headers.get(set_cookie_header)
+ # if set_cookie:
+ # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
+ # if set_cookie != set_cookie_escaped:
+ # del response.headers[set_cookie_header]
+ # response.headers[set_cookie_header] = set_cookie_escaped
+ return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
+
+ https_request = compat_urllib_request.HTTPCookieProcessor.http_request
+ https_response = http_response
+
+
+class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
+ if sys.version_info[0] < 3:
+ def redirect_request(self, req, fp, code, msg, headers, newurl):
+ # On python 2 urlh.geturl() may sometimes return redirect URL
+ # as byte string instead of unicode. This workaround allows
+ # to force it always return unicode.
+ return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl))
+
+
+def extract_timezone(date_str):
+ m = re.search(
+ r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)',
+ date_str)
+ if not m:
+ timezone = datetime.timedelta()
+ else:
+ date_str = date_str[:-len(m.group('tz'))]
+ if not m.group('sign'):
+ timezone = datetime.timedelta()
+ else:
+ sign = 1 if m.group('sign') == '+' else -1
+ timezone = datetime.timedelta(
+ hours=sign * int(m.group('hours')),
+ minutes=sign * int(m.group('minutes')))
+ return timezone, date_str
+
+
+def parse_iso8601(date_str, delimiter='T', timezone=None):
+ """ Return a UNIX timestamp from the given date """
+
+ if date_str is None:
+ return None
+
+ date_str = re.sub(r'\.[0-9]+', '', date_str)
+
+ if timezone is None:
+ timezone, date_str = extract_timezone(date_str)
+
+ try:
+ date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
+ dt = datetime.datetime.strptime(date_str, date_format) - timezone
+ return calendar.timegm(dt.timetuple())
+ except ValueError:
+ pass
+
+
+def date_formats(day_first=True):
+ return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
+
+
+def unified_strdate(date_str, day_first=True):
+ """Return a string with the date in the format YYYYMMDD"""
+
+ if date_str is None:
+ return None
+ upload_date = None
+ # Replace commas
+ date_str = date_str.replace(',', ' ')
+ # Remove AM/PM + timezone
+ date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
+ _, date_str = extract_timezone(date_str)
+
+ for expression in date_formats(day_first):
+ try:
+ upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
+ except ValueError:
+ pass
+ if upload_date is None:
+ timetuple = email.utils.parsedate_tz(date_str)
+ if timetuple:
+ try:
+ upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
+ except ValueError:
+ pass
+ if upload_date is not None:
+ return compat_str(upload_date)
+
+
+def unified_timestamp(date_str, day_first=True):
+ if date_str is None:
+ return None
+
+ date_str = re.sub(r'[,|]', '', date_str)
+
+ pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
+ timezone, date_str = extract_timezone(date_str)
+
+ # Remove AM/PM + timezone
+ date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
+
+ # Remove unrecognized timezones from ISO 8601 alike timestamps
+ m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
+ if m:
+ date_str = date_str[:-len(m.group('tz'))]
+
+ # Python only supports microseconds, so remove nanoseconds
+ m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
+ if m:
+ date_str = m.group(1)
+
+ for expression in date_formats(day_first):
+ try:
+ dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
+ return calendar.timegm(dt.timetuple())
+ except ValueError:
+ pass
+ timetuple = email.utils.parsedate_tz(date_str)
+ if timetuple:
+ return calendar.timegm(timetuple) + pm_delta * 3600
+
+
+def determine_ext(url, default_ext='unknown_video'):
+ if url is None or '.' not in url:
+ return default_ext
+ guess = url.partition('?')[0].rpartition('.')[2]
+ if re.match(r'^[A-Za-z0-9]+$', guess):
+ return guess
+ # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
+ elif guess.rstrip('/') in KNOWN_EXTENSIONS:
+ return guess.rstrip('/')
+ else:
+ return default_ext
+
+
+def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
+ return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
+
+
+def date_from_str(date_str):
+ """
+ Return a datetime object from a string in the format YYYYMMDD or
+ (now|today)[+-][0-9](day|week|month|year)(s)?"""
+ today = datetime.date.today()
+ if date_str in ('now', 'today'):
+ return today
+ if date_str == 'yesterday':
+ return today - datetime.timedelta(days=1)
+ match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
+ if match is not None:
+ sign = match.group('sign')
+ time = int(match.group('time'))
+ if sign == '-':
+ time = -time
+ unit = match.group('unit')
+ # A bad approximation?
+ if unit == 'month':
+ unit = 'day'
+ time *= 30
+ elif unit == 'year':
+ unit = 'day'
+ time *= 365
+ unit += 's'
+ delta = datetime.timedelta(**{unit: time})
+ return today + delta
+ return datetime.datetime.strptime(date_str, '%Y%m%d').date()
+
+
+def hyphenate_date(date_str):
+ """
+ Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
+ match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
+ if match is not None:
+ return '-'.join(match.groups())
+ else:
+ return date_str
+
+
+class DateRange(object):
+ """Represents a time interval between two dates"""
+
+ def __init__(self, start=None, end=None):
+ """start and end must be strings in the format accepted by date"""
+ if start is not None:
+ self.start = date_from_str(start)
+ else:
+ self.start = datetime.datetime.min.date()
+ if end is not None:
+ self.end = date_from_str(end)
+ else:
+ self.end = datetime.datetime.max.date()
+ if self.start > self.end:
+ raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
+
+ @classmethod
+ def day(cls, day):
+ """Returns a range that only contains the given day"""
+ return cls(day, day)
+
+ def __contains__(self, date):
+ """Check if the date is in the range"""
+ if not isinstance(date, datetime.date):
+ date = date_from_str(date)
+ return self.start <= date <= self.end
+
+ def __str__(self):
+ return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
+
+
+def platform_name():
+ """ Returns the platform name as a compat_str """
+ res = platform.platform()
+ if isinstance(res, bytes):
+ res = res.decode(preferredencoding())
+
+ assert isinstance(res, compat_str)
+ return res
+
+
+def _windows_write_string(s, out):
+ """ Returns True if the string was written using special methods,
+ False if it has yet to be written out."""
+ # Adapted from http://stackoverflow.com/a/3259271/35070
+
+ import ctypes
+ import ctypes.wintypes
+
+ WIN_OUTPUT_IDS = {
+ 1: -11,
+ 2: -12,
+ }
+
+ try:
+ fileno = out.fileno()
+ except AttributeError:
+ # If the output stream doesn't have a fileno, it's virtual
+ return False
+ except io.UnsupportedOperation:
+ # Some strange Windows pseudo files?
+ return False
+ if fileno not in WIN_OUTPUT_IDS:
+ return False
+
+ GetStdHandle = compat_ctypes_WINFUNCTYPE(
+ ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
+ ('GetStdHandle', ctypes.windll.kernel32))
+ h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
+
+ WriteConsoleW = compat_ctypes_WINFUNCTYPE(
+ ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
+ ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
+ ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
+ written = ctypes.wintypes.DWORD(0)
+
+ GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
+ FILE_TYPE_CHAR = 0x0002
+ FILE_TYPE_REMOTE = 0x8000
+ GetConsoleMode = compat_ctypes_WINFUNCTYPE(
+ ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
+ ctypes.POINTER(ctypes.wintypes.DWORD))(
+ ('GetConsoleMode', ctypes.windll.kernel32))
+ INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
+
+ def not_a_console(handle):
+ if handle == INVALID_HANDLE_VALUE or handle is None:
+ return True
+ return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
+ or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
+
+ if not_a_console(h):
+ return False
+
+ def next_nonbmp_pos(s):
+ try:
+ return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
+ except StopIteration:
+ return len(s)
+
+ while s:
+ count = min(next_nonbmp_pos(s), 1024)
+
+ ret = WriteConsoleW(
+ h, s, count if count else 2, ctypes.byref(written), None)
+ if ret == 0:
+ raise OSError('Failed to write string')
+ if not count: # We just wrote a non-BMP character
+ assert written.value == 2
+ s = s[1:]
+ else:
+ assert written.value > 0
+ s = s[written.value:]
+ return True
+
+
+def write_string(s, out=None, encoding=None):
+ if out is None:
+ out = sys.stderr
+ assert type(s) == compat_str
+
+ if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
+ if _windows_write_string(s, out):
+ return
+
+ if ('b' in getattr(out, 'mode', '')
+ or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
+ byt = s.encode(encoding or preferredencoding(), 'ignore')
+ out.write(byt)
+ elif hasattr(out, 'buffer'):
+ enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
+ byt = s.encode(enc, 'ignore')
+ out.buffer.write(byt)
+ else:
+ out.write(s)
+ out.flush()
+
+
+def bytes_to_intlist(bs):
+ if not bs:
+ return []
+ if isinstance(bs[0], int): # Python 3
+ return list(bs)
+ else:
+ return [ord(c) for c in bs]
+
+
+def intlist_to_bytes(xs):
+ if not xs:
+ return b''
+ return compat_struct_pack('%dB' % len(xs), *xs)
+
+
+# Cross-platform file locking
+if sys.platform == 'win32':
+ import ctypes.wintypes
+ import msvcrt
+
+ class OVERLAPPED(ctypes.Structure):
+ _fields_ = [
+ ('Internal', ctypes.wintypes.LPVOID),
+ ('InternalHigh', ctypes.wintypes.LPVOID),
+ ('Offset', ctypes.wintypes.DWORD),
+ ('OffsetHigh', ctypes.wintypes.DWORD),
+ ('hEvent', ctypes.wintypes.HANDLE),
+ ]
+
+ kernel32 = ctypes.windll.kernel32
+ LockFileEx = kernel32.LockFileEx
+ LockFileEx.argtypes = [
+ ctypes.wintypes.HANDLE, # hFile
+ ctypes.wintypes.DWORD, # dwFlags
+ ctypes.wintypes.DWORD, # dwReserved
+ ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
+ ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
+ ctypes.POINTER(OVERLAPPED) # Overlapped
+ ]
+ LockFileEx.restype = ctypes.wintypes.BOOL
+ UnlockFileEx = kernel32.UnlockFileEx
+ UnlockFileEx.argtypes = [
+ ctypes.wintypes.HANDLE, # hFile
+ ctypes.wintypes.DWORD, # dwReserved
+ ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
+ ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
+ ctypes.POINTER(OVERLAPPED) # Overlapped
+ ]
+ UnlockFileEx.restype = ctypes.wintypes.BOOL
+ whole_low = 0xffffffff
+ whole_high = 0x7fffffff
+
+ def _lock_file(f, exclusive):
+ overlapped = OVERLAPPED()
+ overlapped.Offset = 0
+ overlapped.OffsetHigh = 0
+ overlapped.hEvent = 0
+ f._lock_file_overlapped_p = ctypes.pointer(overlapped)
+ handle = msvcrt.get_osfhandle(f.fileno())
+ if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
+ whole_low, whole_high, f._lock_file_overlapped_p):
+ raise OSError('Locking file failed: %r' % ctypes.FormatError())
+
+ def _unlock_file(f):
+ assert f._lock_file_overlapped_p
+ handle = msvcrt.get_osfhandle(f.fileno())
+ if not UnlockFileEx(handle, 0,
+ whole_low, whole_high, f._lock_file_overlapped_p):
+ raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
+
+else:
+ # Some platforms, such as Jython, is missing fcntl
+ try:
+ import fcntl
+
+ def _lock_file(f, exclusive):
+ fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
+
+ def _unlock_file(f):
+ fcntl.flock(f, fcntl.LOCK_UN)
+ except ImportError:
+ UNSUPPORTED_MSG = 'file locking is not supported on this platform'
+
+ def _lock_file(f, exclusive):
+ raise IOError(UNSUPPORTED_MSG)
+
+ def _unlock_file(f):
+ raise IOError(UNSUPPORTED_MSG)
+
+
+class locked_file(object):
+ def __init__(self, filename, mode, encoding=None):
+ assert mode in ['r', 'a', 'w']
+ self.f = io.open(filename, mode, encoding=encoding)
+ self.mode = mode
+
+ def __enter__(self):
+ exclusive = self.mode != 'r'
+ try:
+ _lock_file(self.f, exclusive)
+ except IOError:
+ self.f.close()
+ raise
+ return self
+
+ def __exit__(self, etype, value, traceback):
+ try:
+ _unlock_file(self.f)
+ finally:
+ self.f.close()
+
+ def __iter__(self):
+ return iter(self.f)
+
+ def write(self, *args):
+ return self.f.write(*args)
+
+ def read(self, *args):
+ return self.f.read(*args)
+
+
+def get_filesystem_encoding():
+ encoding = sys.getfilesystemencoding()
+ return encoding if encoding is not None else 'utf-8'
+
+
+def shell_quote(args):
+ quoted_args = []
+ encoding = get_filesystem_encoding()
+ for a in args:
+ if isinstance(a, bytes):
+ # We may get a filename encoded with 'encodeFilename'
+ a = a.decode(encoding)
+ quoted_args.append(compat_shlex_quote(a))
+ return ' '.join(quoted_args)
+
+
+def smuggle_url(url, data):
+ """ Pass additional data in a URL for internal use. """
+
+ url, idata = unsmuggle_url(url, {})
+ data.update(idata)
+ sdata = compat_urllib_parse_urlencode(
+ {'__youtubedl_smuggle': json.dumps(data)})
+ return url + '#' + sdata
+
+
+def unsmuggle_url(smug_url, default=None):
+ if '#__youtubedl_smuggle' not in smug_url:
+ return smug_url, default
+ url, _, sdata = smug_url.rpartition('#')
+ jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
+ data = json.loads(jsond)
+ return url, data
+
+
+def format_bytes(bytes):
+ if bytes is None:
+ return 'N/A'
+ if type(bytes) is str:
+ bytes = float(bytes)
+ if bytes == 0.0:
+ exponent = 0
+ else:
+ exponent = int(math.log(bytes, 1024.0))
+ suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
+ converted = float(bytes) / float(1024 ** exponent)
+ return '%.2f%s' % (converted, suffix)
+
+
+def lookup_unit_table(unit_table, s):
+ units_re = '|'.join(re.escape(u) for u in unit_table)
+ m = re.match(
+ r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
+ if not m:
+ return None
+ num_str = m.group('num').replace(',', '.')
+ mult = unit_table[m.group('unit')]
+ return int(float(num_str) * mult)
+
+
+def parse_filesize(s):
+ if s is None:
+ return None
+
+ # The lower-case forms are of course incorrect and unofficial,
+ # but we support those too
+ _UNIT_TABLE = {
+ 'B': 1,
+ 'b': 1,
+ 'bytes': 1,
+ 'KiB': 1024,
+ 'KB': 1000,
+ 'kB': 1024,
+ 'Kb': 1000,
+ 'kb': 1000,
+ 'kilobytes': 1000,
+ 'kibibytes': 1024,
+ 'MiB': 1024 ** 2,
+ 'MB': 1000 ** 2,
+ 'mB': 1024 ** 2,
+ 'Mb': 1000 ** 2,
+ 'mb': 1000 ** 2,
+ 'megabytes': 1000 ** 2,
+ 'mebibytes': 1024 ** 2,
+ 'GiB': 1024 ** 3,
+ 'GB': 1000 ** 3,
+ 'gB': 1024 ** 3,
+ 'Gb': 1000 ** 3,
+ 'gb': 1000 ** 3,
+ 'gigabytes': 1000 ** 3,
+ 'gibibytes': 1024 ** 3,
+ 'TiB': 1024 ** 4,
+ 'TB': 1000 ** 4,
+ 'tB': 1024 ** 4,
+ 'Tb': 1000 ** 4,
+ 'tb': 1000 ** 4,
+ 'terabytes': 1000 ** 4,
+ 'tebibytes': 1024 ** 4,
+ 'PiB': 1024 ** 5,
+ 'PB': 1000 ** 5,
+ 'pB': 1024 ** 5,
+ 'Pb': 1000 ** 5,
+ 'pb': 1000 ** 5,
+ 'petabytes': 1000 ** 5,
+ 'pebibytes': 1024 ** 5,
+ 'EiB': 1024 ** 6,
+ 'EB': 1000 ** 6,
+ 'eB': 1024 ** 6,
+ 'Eb': 1000 ** 6,
+ 'eb': 1000 ** 6,
+ 'exabytes': 1000 ** 6,
+ 'exbibytes': 1024 ** 6,
+ 'ZiB': 1024 ** 7,
+ 'ZB': 1000 ** 7,
+ 'zB': 1024 ** 7,
+ 'Zb': 1000 ** 7,
+ 'zb': 1000 ** 7,
+ 'zettabytes': 1000 ** 7,
+ 'zebibytes': 1024 ** 7,
+ 'YiB': 1024 ** 8,
+ 'YB': 1000 ** 8,
+ 'yB': 1024 ** 8,
+ 'Yb': 1000 ** 8,
+ 'yb': 1000 ** 8,
+ 'yottabytes': 1000 ** 8,
+ 'yobibytes': 1024 ** 8,
+ }
+
+ return lookup_unit_table(_UNIT_TABLE, s)
+
+
+def parse_count(s):
+ if s is None:
+ return None
+
+ s = s.strip()
+
+ if re.match(r'^[\d,.]+$', s):
+ return str_to_int(s)
+
+ _UNIT_TABLE = {
+ 'k': 1000,
+ 'K': 1000,
+ 'm': 1000 ** 2,
+ 'M': 1000 ** 2,
+ 'kk': 1000 ** 2,
+ 'KK': 1000 ** 2,
+ }
+
+ return lookup_unit_table(_UNIT_TABLE, s)
+
+
+def parse_resolution(s):
+ if s is None:
+ return {}
+
+ mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
+ if mobj:
+ return {
+ 'width': int(mobj.group('w')),
+ 'height': int(mobj.group('h')),
+ }
+
+ mobj = re.search(r'\b(\d+)[pPiI]\b', s)
+ if mobj:
+ return {'height': int(mobj.group(1))}
+
+ mobj = re.search(r'\b([48])[kK]\b', s)
+ if mobj:
+ return {'height': int(mobj.group(1)) * 540}
+
+ return {}
+
+
+def parse_bitrate(s):
+ if not isinstance(s, compat_str):
+ return
+ mobj = re.search(r'\b(\d+)\s*kbps', s)
+ if mobj:
+ return int(mobj.group(1))
+
+
+def month_by_name(name, lang='en'):
+ """ Return the number of a month by (locale-independently) English name """
+
+ month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
+
+ try:
+ return month_names.index(name) + 1
+ except ValueError:
+ return None
+
+
+def month_by_abbreviation(abbrev):
+ """ Return the number of a month by (locale-independently) English
+ abbreviations """
+
+ try:
+ return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
+ except ValueError:
+ return None
+
+
+def fix_xml_ampersands(xml_str):
+ """Replace all the '&' by '&amp;' in XML"""
+ return re.sub(
+ r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
+ '&amp;',
+ xml_str)
+
+
+def setproctitle(title):
+ assert isinstance(title, compat_str)
+
+ # ctypes in Jython is not complete
+ # http://bugs.jython.org/issue2148
+ if sys.platform.startswith('java'):
+ return
+
+ try:
+ libc = ctypes.cdll.LoadLibrary('libc.so.6')
+ except OSError:
+ return
+ except TypeError:
+ # LoadLibrary in Windows Python 2.7.13 only expects
+ # a bytestring, but since unicode_literals turns
+ # every string into a unicode string, it fails.
+ return
+ title_bytes = title.encode('utf-8')
+ buf = ctypes.create_string_buffer(len(title_bytes))
+ buf.value = title_bytes
+ try:
+ libc.prctl(15, buf, 0, 0, 0)
+ except AttributeError:
+ return # Strange libc, just skip this
+
+
+def remove_start(s, start):
+ return s[len(start):] if s is not None and s.startswith(start) else s
+
+
+def remove_end(s, end):
+ return s[:-len(end)] if s is not None and s.endswith(end) else s
+
+
+def remove_quotes(s):
+ if s is None or len(s) < 2:
+ return s
+ for quote in ('"', "'", ):
+ if s[0] == quote and s[-1] == quote:
+ return s[1:-1]
+ return s
+
+
+def url_basename(url):
+ path = compat_urlparse.urlparse(url).path
+ return path.strip('/').split('/')[-1]
+
+
+def base_url(url):
+ return re.match(r'https?://[^?#&]+/', url).group()
+
+
+def urljoin(base, path):
+ if isinstance(path, bytes):
+ path = path.decode('utf-8')
+ if not isinstance(path, compat_str) or not path:
+ return None
+ if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
+ return path
+ if isinstance(base, bytes):
+ base = base.decode('utf-8')
+ if not isinstance(base, compat_str) or not re.match(
+ r'^(?:https?:)?//', base):
+ return None
+ return compat_urlparse.urljoin(base, path)
+
+
+class HEADRequest(compat_urllib_request.Request):
+ def get_method(self):
+ return 'HEAD'
+
+
+class PUTRequest(compat_urllib_request.Request):
+ def get_method(self):
+ return 'PUT'
+
+
+def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
+ if get_attr:
+ if v is not None:
+ v = getattr(v, get_attr, None)
+ if v == '':
+ v = None
+ if v is None:
+ return default
+ try:
+ return int(v) * invscale // scale
+ except (ValueError, TypeError):
+ return default
+
+
+def str_or_none(v, default=None):
+ return default if v is None else compat_str(v)
+
+
+def str_to_int(int_str):
+ """ A more relaxed version of int_or_none """
+ if isinstance(int_str, compat_integer_types):
+ return int_str
+ elif isinstance(int_str, compat_str):
+ int_str = re.sub(r'[,\.\+]', '', int_str)
+ return int_or_none(int_str)
+
+
+def float_or_none(v, scale=1, invscale=1, default=None):
+ if v is None:
+ return default
+ try:
+ return float(v) * invscale / scale
+ except (ValueError, TypeError):
+ return default
+
+
+def bool_or_none(v, default=None):
+ return v if isinstance(v, bool) else default
+
+
+def strip_or_none(v, default=None):
+ return v.strip() if isinstance(v, compat_str) else default
+
+
+def url_or_none(url):
+ if not url or not isinstance(url, compat_str):
+ return None
+ url = url.strip()
+ return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None
+
+
+def parse_duration(s):
+ if not isinstance(s, compat_basestring):
+ return None
+
+ s = s.strip()
+
+ days, hours, mins, secs, ms = [None] * 5
+ m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
+ if m:
+ days, hours, mins, secs, ms = m.groups()
+ else:
+ m = re.match(
+ r'''(?ix)(?:P?
+ (?:
+ [0-9]+\s*y(?:ears?)?\s*
+ )?
+ (?:
+ [0-9]+\s*m(?:onths?)?\s*
+ )?
+ (?:
+ [0-9]+\s*w(?:eeks?)?\s*
+ )?
+ (?:
+ (?P<days>[0-9]+)\s*d(?:ays?)?\s*
+ )?
+ T)?
+ (?:
+ (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
+ )?
+ (?:
+ (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
+ )?
+ (?:
+ (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
+ )?Z?$''', s)
+ if m:
+ days, hours, mins, secs, ms = m.groups()
+ else:
+ m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
+ if m:
+ hours, mins = m.groups()
+ else:
+ return None
+
+ duration = 0
+ if secs:
+ duration += float(secs)
+ if mins:
+ duration += float(mins) * 60
+ if hours:
+ duration += float(hours) * 60 * 60
+ if days:
+ duration += float(days) * 24 * 60 * 60
+ if ms:
+ duration += float(ms)
+ return duration
+
+
+def prepend_extension(filename, ext, expected_real_ext=None):
+ name, real_ext = os.path.splitext(filename)
+ return (
+ '{0}.{1}{2}'.format(name, ext, real_ext)
+ if not expected_real_ext or real_ext[1:] == expected_real_ext
+ else '{0}.{1}'.format(filename, ext))
+
+
+def replace_extension(filename, ext, expected_real_ext=None):
+ name, real_ext = os.path.splitext(filename)
+ return '{0}.{1}'.format(
+ name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
+ ext)
+
+
+def check_executable(exe, args=[]):
+ """ Checks if the given binary is installed somewhere in PATH, and returns its name.
+ args can be a list of arguments for a short output (like -version) """
+ try:
+ subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
+ except OSError:
+ return False
+ return exe
+
+
+def get_exe_version(exe, args=['--version'],
+ version_re=None, unrecognized='present'):
+ """ Returns the version of the specified executable,
+ or False if the executable is not present """
+ try:
+ # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
+ # SIGTTOU if youtube-dlc is run in the background.
+ # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
+ out, _ = subprocess.Popen(
+ [encodeArgument(exe)] + args,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
+ except OSError:
+ return False
+ if isinstance(out, bytes): # Python 2.x
+ out = out.decode('ascii', 'ignore')
+ return detect_exe_version(out, version_re, unrecognized)
+
+
+def detect_exe_version(output, version_re=None, unrecognized='present'):
+ assert isinstance(output, compat_str)
+ if version_re is None:
+ version_re = r'version\s+([-0-9._a-zA-Z]+)'
+ m = re.search(version_re, output)
+ if m:
+ return m.group(1)
+ else:
+ return unrecognized
+
+
+class PagedList(object):
+ def __len__(self):
+ # This is only useful for tests
+ return len(self.getslice())
+
+
+class OnDemandPagedList(PagedList):
+ def __init__(self, pagefunc, pagesize, use_cache=True):
+ self._pagefunc = pagefunc
+ self._pagesize = pagesize
+ self._use_cache = use_cache
+ if use_cache:
+ self._cache = {}
+
+ def getslice(self, start=0, end=None):
+ res = []
+ for pagenum in itertools.count(start // self._pagesize):
+ firstid = pagenum * self._pagesize
+ nextfirstid = pagenum * self._pagesize + self._pagesize
+ if start >= nextfirstid:
+ continue
+
+ page_results = None
+ if self._use_cache:
+ page_results = self._cache.get(pagenum)
+ if page_results is None:
+ page_results = list(self._pagefunc(pagenum))
+ if self._use_cache:
+ self._cache[pagenum] = page_results
+
+ startv = (
+ start % self._pagesize
+ if firstid <= start < nextfirstid
+ else 0)
+
+ endv = (
+ ((end - 1) % self._pagesize) + 1
+ if (end is not None and firstid <= end <= nextfirstid)
+ else None)
+
+ if startv != 0 or endv is not None:
+ page_results = page_results[startv:endv]
+ res.extend(page_results)
+
+ # A little optimization - if current page is not "full", ie. does
+ # not contain page_size videos then we can assume that this page
+ # is the last one - there are no more ids on further pages -
+ # i.e. no need to query again.
+ if len(page_results) + startv < self._pagesize:
+ break
+
+ # If we got the whole page, but the next page is not interesting,
+ # break out early as well
+ if end == nextfirstid:
+ break
+ return res
+
+
+class InAdvancePagedList(PagedList):
+ def __init__(self, pagefunc, pagecount, pagesize):
+ self._pagefunc = pagefunc
+ self._pagecount = pagecount
+ self._pagesize = pagesize
+
+ def getslice(self, start=0, end=None):
+ res = []
+ start_page = start // self._pagesize
+ end_page = (
+ self._pagecount if end is None else (end // self._pagesize + 1))
+ skip_elems = start - start_page * self._pagesize
+ only_more = None if end is None else end - start
+ for pagenum in range(start_page, end_page):
+ page = list(self._pagefunc(pagenum))
+ if skip_elems:
+ page = page[skip_elems:]
+ skip_elems = None
+ if only_more is not None:
+ if len(page) < only_more:
+ only_more -= len(page)
+ else:
+ page = page[:only_more]
+ res.extend(page)
+ break
+ res.extend(page)
+ return res
+
+
+def uppercase_escape(s):
+ unicode_escape = codecs.getdecoder('unicode_escape')
+ return re.sub(
+ r'\\U[0-9a-fA-F]{8}',
+ lambda m: unicode_escape(m.group(0))[0],
+ s)
+
+
+def lowercase_escape(s):
+ unicode_escape = codecs.getdecoder('unicode_escape')
+ return re.sub(
+ r'\\u[0-9a-fA-F]{4}',
+ lambda m: unicode_escape(m.group(0))[0],
+ s)
+
+
+def escape_rfc3986(s):
+ """Escape non-ASCII characters as suggested by RFC 3986"""
+ if sys.version_info < (3, 0) and isinstance(s, compat_str):
+ s = s.encode('utf-8')
+ return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
+
+
+def escape_url(url):
+ """Escape URL as suggested by RFC 3986"""
+ url_parsed = compat_urllib_parse_urlparse(url)
+ return url_parsed._replace(
+ netloc=url_parsed.netloc.encode('idna').decode('ascii'),
+ path=escape_rfc3986(url_parsed.path),
+ params=escape_rfc3986(url_parsed.params),
+ query=escape_rfc3986(url_parsed.query),
+ fragment=escape_rfc3986(url_parsed.fragment)
+ ).geturl()
+
+
+def read_batch_urls(batch_fd):
+ def fixup(url):
+ if not isinstance(url, compat_str):
+ url = url.decode('utf-8', 'replace')
+ BOM_UTF8 = '\xef\xbb\xbf'
+ if url.startswith(BOM_UTF8):
+ url = url[len(BOM_UTF8):]
+ url = url.strip()
+ if url.startswith(('#', ';', ']')):
+ return False
+ return url
+
+ with contextlib.closing(batch_fd) as fd:
+ return [url for url in map(fixup, fd) if url]
+
+
+def urlencode_postdata(*args, **kargs):
+ return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
+
+
+def update_url_query(url, query):
+ if not query:
+ return url
+ parsed_url = compat_urlparse.urlparse(url)
+ qs = compat_parse_qs(parsed_url.query)
+ qs.update(query)
+ return compat_urlparse.urlunparse(parsed_url._replace(
+ query=compat_urllib_parse_urlencode(qs, True)))
+
+
+def update_Request(req, url=None, data=None, headers={}, query={}):
+ req_headers = req.headers.copy()
+ req_headers.update(headers)
+ req_data = data or req.data
+ req_url = update_url_query(url or req.get_full_url(), query)
+ req_get_method = req.get_method()
+ if req_get_method == 'HEAD':
+ req_type = HEADRequest
+ elif req_get_method == 'PUT':
+ req_type = PUTRequest
+ else:
+ req_type = compat_urllib_request.Request
+ new_req = req_type(
+ req_url, data=req_data, headers=req_headers,
+ origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
+ if hasattr(req, 'timeout'):
+ new_req.timeout = req.timeout
+ return new_req
+
+
+def _multipart_encode_impl(data, boundary):
+ content_type = 'multipart/form-data; boundary=%s' % boundary
+
+ out = b''
+ for k, v in data.items():
+ out += b'--' + boundary.encode('ascii') + b'\r\n'
+ if isinstance(k, compat_str):
+ k = k.encode('utf-8')
+ if isinstance(v, compat_str):
+ v = v.encode('utf-8')
+ # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
+ # suggests sending UTF-8 directly. Firefox sends UTF-8, too
+ content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
+ if boundary.encode('ascii') in content:
+ raise ValueError('Boundary overlaps with data')
+ out += content
+
+ out += b'--' + boundary.encode('ascii') + b'--\r\n'
+
+ return out, content_type
+
+
+def multipart_encode(data, boundary=None):
+ '''
+ Encode a dict to RFC 7578-compliant form-data
+
+ data:
+ A dict where keys and values can be either Unicode or bytes-like
+ objects.
+ boundary:
+ If specified a Unicode object, it's used as the boundary. Otherwise
+ a random boundary is generated.
+
+ Reference: https://tools.ietf.org/html/rfc7578
+ '''
+ has_specified_boundary = boundary is not None
+
+ while True:
+ if boundary is None:
+ boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
+
+ try:
+ out, content_type = _multipart_encode_impl(data, boundary)
+ break
+ except ValueError:
+ if has_specified_boundary:
+ raise
+ boundary = None
+
+ return out, content_type
+
+
+def dict_get(d, key_or_keys, default=None, skip_false_values=True):
+ if isinstance(key_or_keys, (list, tuple)):
+ for key in key_or_keys:
+ if key not in d or d[key] is None or skip_false_values and not d[key]:
+ continue
+ return d[key]
+ return default
+ return d.get(key_or_keys, default)
+
+
+def try_get(src, getter, expected_type=None):
+ if not isinstance(getter, (list, tuple)):
+ getter = [getter]
+ for get in getter:
+ try:
+ v = get(src)
+ except (AttributeError, KeyError, TypeError, IndexError):
+ pass
+ else:
+ if expected_type is None or isinstance(v, expected_type):
+ return v
+
+
+def merge_dicts(*dicts):
+ merged = {}
+ for a_dict in dicts:
+ for k, v in a_dict.items():
+ if v is None:
+ continue
+ if (k not in merged
+ or (isinstance(v, compat_str) and v
+ and isinstance(merged[k], compat_str)
+ and not merged[k])):
+ merged[k] = v
+ return merged
+
+
+def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
+ return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
+
+
+US_RATINGS = {
+ 'G': 0,
+ 'PG': 10,
+ 'PG-13': 13,
+ 'R': 16,
+ 'NC': 18,
+}
+
+
+TV_PARENTAL_GUIDELINES = {
+ 'TV-Y': 0,
+ 'TV-Y7': 7,
+ 'TV-G': 0,
+ 'TV-PG': 0,
+ 'TV-14': 14,
+ 'TV-MA': 17,
+}
+
+
+def parse_age_limit(s):
+ if type(s) == int:
+ return s if 0 <= s <= 21 else None
+ if not isinstance(s, compat_basestring):
+ return None
+ m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
+ if m:
+ return int(m.group('age'))
+ if s in US_RATINGS:
+ return US_RATINGS[s]
+ m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
+ if m:
+ return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
+ return None
+
+
+def strip_jsonp(code):
+ return re.sub(
+ r'''(?sx)^
+ (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
+ (?:\s*&&\s*(?P=func_name))?
+ \s*\(\s*(?P<callback_data>.*)\);?
+ \s*?(?://[^\n]*)*$''',
+ r'\g<callback_data>', code)
+
+
+def js_to_json(code):
+ COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
+ SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
+ INTEGER_TABLE = (
+ (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
+ (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
+ )
+
+ def fix_kv(m):
+ v = m.group(0)
+ if v in ('true', 'false', 'null'):
+ return v
+ elif v.startswith('/*') or v.startswith('//') or v == ',':
+ return ""
+
+ if v[0] in ("'", '"'):
+ v = re.sub(r'(?s)\\.|"', lambda m: {
+ '"': '\\"',
+ "\\'": "'",
+ '\\\n': '',
+ '\\x': '\\u00',
+ }.get(m.group(0), m.group(0)), v[1:-1])
+
+ for regex, base in INTEGER_TABLE:
+ im = re.match(regex, v)
+ if im:
+ i = int(im.group(1), base)
+ return '"%d":' % i if v.endswith(':') else '%d' % i
+
+ return '"%s"' % v
+
+ return re.sub(r'''(?sx)
+ "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
+ '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
+ {comment}|,(?={skip}[\]}}])|
+ (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
+ \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
+ [0-9]+(?={skip}:)
+ '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
+
+
+def qualities(quality_ids):
+ """ Get a numeric quality value out of a list of possible values """
+ def q(qid):
+ try:
+ return quality_ids.index(qid)
+ except ValueError:
+ return -1
+ return q
+
+
+DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
+
+
+def limit_length(s, length):
+ """ Add ellipses to overly long strings """
+ if s is None:
+ return None
+ ELLIPSES = '...'
+ if len(s) > length:
+ return s[:length - len(ELLIPSES)] + ELLIPSES
+ return s
+
+
+def version_tuple(v):
+ return tuple(int(e) for e in re.split(r'[-.]', v))
+
+
+def is_outdated_version(version, limit, assume_new=True):
+ if not version:
+ return not assume_new
+ try:
+ return version_tuple(version) < version_tuple(limit)
+ except ValueError:
+ return not assume_new
+
+
+def ytdl_is_updateable():
+ """ Returns if youtube-dlc can be updated with -U """
+ from zipimport import zipimporter
+
+ return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
+
+
+def args_to_str(args):
+ # Get a short string representation for a subprocess command
+ return ' '.join(compat_shlex_quote(a) for a in args)
+
+
+def error_to_compat_str(err):
+ err_str = str(err)
+ # On python 2 error byte string must be decoded with proper
+ # encoding rather than ascii
+ if sys.version_info[0] < 3:
+ err_str = err_str.decode(preferredencoding())
+ return err_str
+
+
+def mimetype2ext(mt):
+ if mt is None:
+ return None
+
+ ext = {
+ 'audio/mp4': 'm4a',
+ # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
+ # it's the most popular one
+ 'audio/mpeg': 'mp3',
+ 'audio/x-wav': 'wav',
+ }.get(mt)
+ if ext is not None:
+ return ext
+
+ _, _, res = mt.rpartition('/')
+ res = res.split(';')[0].strip().lower()
+
+ return {
+ '3gpp': '3gp',
+ 'smptett+xml': 'tt',
+ 'ttaf+xml': 'dfxp',
+ 'ttml+xml': 'ttml',
+ 'x-flv': 'flv',
+ 'x-mp4-fragmented': 'mp4',
+ 'x-ms-sami': 'sami',
+ 'x-ms-wmv': 'wmv',
+ 'mpegurl': 'm3u8',
+ 'x-mpegurl': 'm3u8',
+ 'vnd.apple.mpegurl': 'm3u8',
+ 'dash+xml': 'mpd',
+ 'f4m+xml': 'f4m',
+ 'hds+xml': 'f4m',
+ 'vnd.ms-sstr+xml': 'ism',
+ 'quicktime': 'mov',
+ 'mp2t': 'ts',
+ }.get(res, res)
+
+
+def parse_codecs(codecs_str):
+ # http://tools.ietf.org/html/rfc6381
+ if not codecs_str:
+ return {}
+ splited_codecs = list(filter(None, map(
+ lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
+ vcodec, acodec = None, None
+ for full_codec in splited_codecs:
+ codec = full_codec.split('.')[0]
+ if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
+ if not vcodec:
+ vcodec = full_codec
+ elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
+ if not acodec:
+ acodec = full_codec
+ else:
+ write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
+ if not vcodec and not acodec:
+ if len(splited_codecs) == 2:
+ return {
+ 'vcodec': splited_codecs[0],
+ 'acodec': splited_codecs[1],
+ }
+ else:
+ return {
+ 'vcodec': vcodec or 'none',
+ 'acodec': acodec or 'none',
+ }
+ return {}
+
+
+def urlhandle_detect_ext(url_handle):
+ getheader = url_handle.headers.get
+
+ cd = getheader('Content-Disposition')
+ if cd:
+ m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
+ if m:
+ e = determine_ext(m.group('filename'), default_ext=None)
+ if e:
+ return e
+
+ return mimetype2ext(getheader('Content-Type'))
+
+
+def encode_data_uri(data, mime_type):
+ return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
+
+
+def age_restricted(content_limit, age_limit):
+ """ Returns True iff the content should be blocked """
+
+ if age_limit is None: # No limit set
+ return False
+ if content_limit is None:
+ return False # Content available for everyone
+ return age_limit < content_limit
+
+
+def is_html(first_bytes):
+ """ Detect whether a file contains HTML by examining its first bytes. """
+
+ BOMS = [
+ (b'\xef\xbb\xbf', 'utf-8'),
+ (b'\x00\x00\xfe\xff', 'utf-32-be'),
+ (b'\xff\xfe\x00\x00', 'utf-32-le'),
+ (b'\xff\xfe', 'utf-16-le'),
+ (b'\xfe\xff', 'utf-16-be'),
+ ]
+ for bom, enc in BOMS:
+ if first_bytes.startswith(bom):
+ s = first_bytes[len(bom):].decode(enc, 'replace')
+ break
+ else:
+ s = first_bytes.decode('utf-8', 'replace')
+
+ return re.match(r'^\s*<', s)
+
+
+def determine_protocol(info_dict):
+ protocol = info_dict.get('protocol')
+ if protocol is not None:
+ return protocol
+
+ url = info_dict['url']
+ if url.startswith('rtmp'):
+ return 'rtmp'
+ elif url.startswith('mms'):
+ return 'mms'
+ elif url.startswith('rtsp'):
+ return 'rtsp'
+
+ ext = determine_ext(url)
+ if ext == 'm3u8':
+ return 'm3u8'
+ elif ext == 'f4m':
+ return 'f4m'
+
+ return compat_urllib_parse_urlparse(url).scheme
+
+
+def render_table(header_row, data):
+ """ Render a list of rows, each as a list of values """
+ table = [header_row] + data
+ max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
+ format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
+ return '\n'.join(format_str % tuple(row) for row in table)
+
+
+def _match_one(filter_part, dct):
+ COMPARISON_OPERATORS = {
+ '<': operator.lt,
+ '<=': operator.le,
+ '>': operator.gt,
+ '>=': operator.ge,
+ '=': operator.eq,
+ '!=': operator.ne,
+ }
+ operator_rex = re.compile(r'''(?x)\s*
+ (?P<key>[a-z_]+)
+ \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
+ (?:
+ (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
+ (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
+ (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
+ )
+ \s*$
+ ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
+ m = operator_rex.search(filter_part)
+ if m:
+ op = COMPARISON_OPERATORS[m.group('op')]
+ actual_value = dct.get(m.group('key'))
+ if (m.group('quotedstrval') is not None
+ or m.group('strval') is not None
+ # If the original field is a string and matching comparisonvalue is
+ # a number we should respect the origin of the original field
+ # and process comparison value as a string (see
+ # https://github.com/ytdl-org/youtube-dl/issues/11082).
+ or actual_value is not None and m.group('intval') is not None
+ and isinstance(actual_value, compat_str)):
+ if m.group('op') not in ('=', '!='):
+ raise ValueError(
+ 'Operator %s does not support string values!' % m.group('op'))
+ comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
+ quote = m.group('quote')
+ if quote is not None:
+ comparison_value = comparison_value.replace(r'\%s' % quote, quote)
+ else:
+ try:
+ comparison_value = int(m.group('intval'))
+ except ValueError:
+ comparison_value = parse_filesize(m.group('intval'))
+ if comparison_value is None:
+ comparison_value = parse_filesize(m.group('intval') + 'B')
+ if comparison_value is None:
+ raise ValueError(
+ 'Invalid integer value %r in filter part %r' % (
+ m.group('intval'), filter_part))
+ if actual_value is None:
+ return m.group('none_inclusive')
+ return op(actual_value, comparison_value)
+
+ UNARY_OPERATORS = {
+ '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
+ '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
+ }
+ operator_rex = re.compile(r'''(?x)\s*
+ (?P<op>%s)\s*(?P<key>[a-z_]+)
+ \s*$
+ ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
+ m = operator_rex.search(filter_part)
+ if m:
+ op = UNARY_OPERATORS[m.group('op')]
+ actual_value = dct.get(m.group('key'))
+ return op(actual_value)
+
+ raise ValueError('Invalid filter part %r' % filter_part)
+
+
+def match_str(filter_str, dct):
+ """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
+
+ return all(
+ _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
+
+
+def match_filter_func(filter_str):
+ def _match_func(info_dict):
+ if match_str(filter_str, info_dict):
+ return None
+ else:
+ video_title = info_dict.get('title', info_dict.get('id', 'video'))
+ return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
+ return _match_func
+
+
+def parse_dfxp_time_expr(time_expr):
+ if not time_expr:
+ return
+
+ mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
+ if mobj:
+ return float(mobj.group('time_offset'))
+
+ mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
+ if mobj:
+ return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
+
+
+def srt_subtitles_timecode(seconds):
+ return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
+
+
+def dfxp2srt(dfxp_data):
+ '''
+ @param dfxp_data A bytes-like object containing DFXP data
+ @returns A unicode object containing converted SRT data
+ '''
+ LEGACY_NAMESPACES = (
+ (b'http://www.w3.org/ns/ttml', [
+ b'http://www.w3.org/2004/11/ttaf1',
+ b'http://www.w3.org/2006/04/ttaf1',
+ b'http://www.w3.org/2006/10/ttaf1',
+ ]),
+ (b'http://www.w3.org/ns/ttml#styling', [
+ b'http://www.w3.org/ns/ttml#style',
+ ]),
+ )
+
+ SUPPORTED_STYLING = [
+ 'color',
+ 'fontFamily',
+ 'fontSize',
+ 'fontStyle',
+ 'fontWeight',
+ 'textDecoration'
+ ]
+
+ _x = functools.partial(xpath_with_ns, ns_map={
+ 'xml': 'http://www.w3.org/XML/1998/namespace',
+ 'ttml': 'http://www.w3.org/ns/ttml',
+ 'tts': 'http://www.w3.org/ns/ttml#styling',
+ })
+
+ styles = {}
+ default_style = {}
+
+ class TTMLPElementParser(object):
+ _out = ''
+ _unclosed_elements = []
+ _applied_styles = []
+
+ def start(self, tag, attrib):
+ if tag in (_x('ttml:br'), 'br'):
+ self._out += '\n'
+ else:
+ unclosed_elements = []
+ style = {}
+ element_style_id = attrib.get('style')
+ if default_style:
+ style.update(default_style)
+ if element_style_id:
+ style.update(styles.get(element_style_id, {}))
+ for prop in SUPPORTED_STYLING:
+ prop_val = attrib.get(_x('tts:' + prop))
+ if prop_val:
+ style[prop] = prop_val
+ if style:
+ font = ''
+ for k, v in sorted(style.items()):
+ if self._applied_styles and self._applied_styles[-1].get(k) == v:
+ continue
+ if k == 'color':
+ font += ' color="%s"' % v
+ elif k == 'fontSize':
+ font += ' size="%s"' % v
+ elif k == 'fontFamily':
+ font += ' face="%s"' % v
+ elif k == 'fontWeight' and v == 'bold':
+ self._out += '<b>'
+ unclosed_elements.append('b')
+ elif k == 'fontStyle' and v == 'italic':
+ self._out += '<i>'
+ unclosed_elements.append('i')
+ elif k == 'textDecoration' and v == 'underline':
+ self._out += '<u>'
+ unclosed_elements.append('u')
+ if font:
+ self._out += '<font' + font + '>'
+ unclosed_elements.append('font')
+ applied_style = {}
+ if self._applied_styles:
+ applied_style.update(self._applied_styles[-1])
+ applied_style.update(style)
+ self._applied_styles.append(applied_style)
+ self._unclosed_elements.append(unclosed_elements)
+
+ def end(self, tag):
+ if tag not in (_x('ttml:br'), 'br'):
+ unclosed_elements = self._unclosed_elements.pop()
+ for element in reversed(unclosed_elements):
+ self._out += '</%s>' % element
+ if unclosed_elements and self._applied_styles:
+ self._applied_styles.pop()
+
+ def data(self, data):
+ self._out += data
+
+ def close(self):
+ return self._out.strip()
+
+ def parse_node(node):
+ target = TTMLPElementParser()
+ parser = xml.etree.ElementTree.XMLParser(target=target)
+ parser.feed(xml.etree.ElementTree.tostring(node))
+ return parser.close()
+
+ for k, v in LEGACY_NAMESPACES:
+ for ns in v:
+ dfxp_data = dfxp_data.replace(ns, k)
+
+ dfxp = compat_etree_fromstring(dfxp_data)
+ out = []
+ paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
+
+ if not paras:
+ raise ValueError('Invalid dfxp/TTML subtitle')
+
+ repeat = False
+ while True:
+ for style in dfxp.findall(_x('.//ttml:style')):
+ style_id = style.get('id') or style.get(_x('xml:id'))
+ if not style_id:
+ continue
+ parent_style_id = style.get('style')
+ if parent_style_id:
+ if parent_style_id not in styles:
+ repeat = True
+ continue
+ styles[style_id] = styles[parent_style_id].copy()
+ for prop in SUPPORTED_STYLING:
+ prop_val = style.get(_x('tts:' + prop))
+ if prop_val:
+ styles.setdefault(style_id, {})[prop] = prop_val
+ if repeat:
+ repeat = False
+ else:
+ break
+
+ for p in ('body', 'div'):
+ ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
+ if ele is None:
+ continue
+ style = styles.get(ele.get('style'))
+ if not style:
+ continue
+ default_style.update(style)
+
+ for para, index in zip(paras, itertools.count(1)):
+ begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
+ end_time = parse_dfxp_time_expr(para.attrib.get('end'))
+ dur = parse_dfxp_time_expr(para.attrib.get('dur'))
+ if begin_time is None:
+ continue
+ if not end_time:
+ if not dur:
+ continue
+ end_time = begin_time + dur
+ out.append('%d\n%s --> %s\n%s\n\n' % (
+ index,
+ srt_subtitles_timecode(begin_time),
+ srt_subtitles_timecode(end_time),
+ parse_node(para)))
+
+ return ''.join(out)
+
+
+def cli_option(params, command_option, param):
+ param = params.get(param)
+ if param:
+ param = compat_str(param)
+ return [command_option, param] if param is not None else []
+
+
+def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
+ param = params.get(param)
+ if param is None:
+ return []
+ assert isinstance(param, bool)
+ if separator:
+ return [command_option + separator + (true_value if param else false_value)]
+ return [command_option, true_value if param else false_value]
+
+
+def cli_valueless_option(params, command_option, param, expected_value=True):
+ param = params.get(param)
+ return [command_option] if param == expected_value else []
+
+
+def cli_configuration_args(params, param, default=[]):
+ ex_args = params.get(param)
+ if ex_args is None:
+ return default
+ assert isinstance(ex_args, list)
+ return ex_args
+
+
+class ISO639Utils(object):
+ # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
+ _lang_map = {
+ 'aa': 'aar',
+ 'ab': 'abk',
+ 'ae': 'ave',
+ 'af': 'afr',
+ 'ak': 'aka',
+ 'am': 'amh',
+ 'an': 'arg',
+ 'ar': 'ara',
+ 'as': 'asm',
+ 'av': 'ava',
+ 'ay': 'aym',
+ 'az': 'aze',
+ 'ba': 'bak',
+ 'be': 'bel',
+ 'bg': 'bul',
+ 'bh': 'bih',
+ 'bi': 'bis',
+ 'bm': 'bam',
+ 'bn': 'ben',
+ 'bo': 'bod',
+ 'br': 'bre',
+ 'bs': 'bos',
+ 'ca': 'cat',
+ 'ce': 'che',
+ 'ch': 'cha',
+ 'co': 'cos',
+ 'cr': 'cre',
+ 'cs': 'ces',
+ 'cu': 'chu',
+ 'cv': 'chv',
+ 'cy': 'cym',
+ 'da': 'dan',
+ 'de': 'deu',
+ 'dv': 'div',
+ 'dz': 'dzo',
+ 'ee': 'ewe',
+ 'el': 'ell',
+ 'en': 'eng',
+ 'eo': 'epo',
+ 'es': 'spa',
+ 'et': 'est',
+ 'eu': 'eus',
+ 'fa': 'fas',
+ 'ff': 'ful',
+ 'fi': 'fin',
+ 'fj': 'fij',
+ 'fo': 'fao',
+ 'fr': 'fra',
+ 'fy': 'fry',
+ 'ga': 'gle',
+ 'gd': 'gla',
+ 'gl': 'glg',
+ 'gn': 'grn',
+ 'gu': 'guj',
+ 'gv': 'glv',
+ 'ha': 'hau',
+ 'he': 'heb',
+ 'iw': 'heb', # Replaced by he in 1989 revision
+ 'hi': 'hin',
+ 'ho': 'hmo',
+ 'hr': 'hrv',
+ 'ht': 'hat',
+ 'hu': 'hun',
+ 'hy': 'hye',
+ 'hz': 'her',
+ 'ia': 'ina',
+ 'id': 'ind',
+ 'in': 'ind', # Replaced by id in 1989 revision
+ 'ie': 'ile',
+ 'ig': 'ibo',
+ 'ii': 'iii',
+ 'ik': 'ipk',
+ 'io': 'ido',
+ 'is': 'isl',
+ 'it': 'ita',
+ 'iu': 'iku',
+ 'ja': 'jpn',
+ 'jv': 'jav',
+ 'ka': 'kat',
+ 'kg': 'kon',
+ 'ki': 'kik',
+ 'kj': 'kua',
+ 'kk': 'kaz',
+ 'kl': 'kal',
+ 'km': 'khm',
+ 'kn': 'kan',
+ 'ko': 'kor',
+ 'kr': 'kau',
+ 'ks': 'kas',
+ 'ku': 'kur',
+ 'kv': 'kom',
+ 'kw': 'cor',
+ 'ky': 'kir',
+ 'la': 'lat',
+ 'lb': 'ltz',
+ 'lg': 'lug',
+ 'li': 'lim',
+ 'ln': 'lin',
+ 'lo': 'lao',
+ 'lt': 'lit',
+ 'lu': 'lub',
+ 'lv': 'lav',
+ 'mg': 'mlg',
+ 'mh': 'mah',
+ 'mi': 'mri',
+ 'mk': 'mkd',
+ 'ml': 'mal',
+ 'mn': 'mon',
+ 'mr': 'mar',
+ 'ms': 'msa',
+ 'mt': 'mlt',
+ 'my': 'mya',
+ 'na': 'nau',
+ 'nb': 'nob',
+ 'nd': 'nde',
+ 'ne': 'nep',
+ 'ng': 'ndo',
+ 'nl': 'nld',
+ 'nn': 'nno',
+ 'no': 'nor',
+ 'nr': 'nbl',
+ 'nv': 'nav',
+ 'ny': 'nya',
+ 'oc': 'oci',
+ 'oj': 'oji',
+ 'om': 'orm',
+ 'or': 'ori',
+ 'os': 'oss',
+ 'pa': 'pan',
+ 'pi': 'pli',
+ 'pl': 'pol',
+ 'ps': 'pus',
+ 'pt': 'por',
+ 'qu': 'que',
+ 'rm': 'roh',
+ 'rn': 'run',
+ 'ro': 'ron',
+ 'ru': 'rus',
+ 'rw': 'kin',
+ 'sa': 'san',
+ 'sc': 'srd',
+ 'sd': 'snd',
+ 'se': 'sme',
+ 'sg': 'sag',
+ 'si': 'sin',
+ 'sk': 'slk',
+ 'sl': 'slv',
+ 'sm': 'smo',
+ 'sn': 'sna',
+ 'so': 'som',
+ 'sq': 'sqi',
+ 'sr': 'srp',
+ 'ss': 'ssw',
+ 'st': 'sot',
+ 'su': 'sun',
+ 'sv': 'swe',
+ 'sw': 'swa',
+ 'ta': 'tam',
+ 'te': 'tel',
+ 'tg': 'tgk',
+ 'th': 'tha',
+ 'ti': 'tir',
+ 'tk': 'tuk',
+ 'tl': 'tgl',
+ 'tn': 'tsn',
+ 'to': 'ton',
+ 'tr': 'tur',
+ 'ts': 'tso',
+ 'tt': 'tat',
+ 'tw': 'twi',
+ 'ty': 'tah',
+ 'ug': 'uig',
+ 'uk': 'ukr',
+ 'ur': 'urd',
+ 'uz': 'uzb',
+ 've': 'ven',
+ 'vi': 'vie',
+ 'vo': 'vol',
+ 'wa': 'wln',
+ 'wo': 'wol',
+ 'xh': 'xho',
+ 'yi': 'yid',
+ 'ji': 'yid', # Replaced by yi in 1989 revision
+ 'yo': 'yor',
+ 'za': 'zha',
+ 'zh': 'zho',
+ 'zu': 'zul',
+ }
+
+ @classmethod
+ def short2long(cls, code):
+ """Convert language code from ISO 639-1 to ISO 639-2/T"""
+ return cls._lang_map.get(code[:2])
+
+ @classmethod
+ def long2short(cls, code):
+ """Convert language code from ISO 639-2/T to ISO 639-1"""
+ for short_name, long_name in cls._lang_map.items():
+ if long_name == code:
+ return short_name
+
+
+class ISO3166Utils(object):
+ # From http://data.okfn.org/data/core/country-list
+ _country_map = {
+ 'AF': 'Afghanistan',
+ 'AX': 'Åland Islands',
+ 'AL': 'Albania',
+ 'DZ': 'Algeria',
+ 'AS': 'American Samoa',
+ 'AD': 'Andorra',
+ 'AO': 'Angola',
+ 'AI': 'Anguilla',
+ 'AQ': 'Antarctica',
+ 'AG': 'Antigua and Barbuda',
+ 'AR': 'Argentina',
+ 'AM': 'Armenia',
+ 'AW': 'Aruba',
+ 'AU': 'Australia',
+ 'AT': 'Austria',
+ 'AZ': 'Azerbaijan',
+ 'BS': 'Bahamas',
+ 'BH': 'Bahrain',
+ 'BD': 'Bangladesh',
+ 'BB': 'Barbados',
+ 'BY': 'Belarus',
+ 'BE': 'Belgium',
+ 'BZ': 'Belize',
+ 'BJ': 'Benin',
+ 'BM': 'Bermuda',
+ 'BT': 'Bhutan',
+ 'BO': 'Bolivia, Plurinational State of',
+ 'BQ': 'Bonaire, Sint Eustatius and Saba',
+ 'BA': 'Bosnia and Herzegovina',
+ 'BW': 'Botswana',
+ 'BV': 'Bouvet Island',
+ 'BR': 'Brazil',
+ 'IO': 'British Indian Ocean Territory',
+ 'BN': 'Brunei Darussalam',
+ 'BG': 'Bulgaria',
+ 'BF': 'Burkina Faso',
+ 'BI': 'Burundi',
+ 'KH': 'Cambodia',
+ 'CM': 'Cameroon',
+ 'CA': 'Canada',
+ 'CV': 'Cape Verde',
+ 'KY': 'Cayman Islands',
+ 'CF': 'Central African Republic',
+ 'TD': 'Chad',
+ 'CL': 'Chile',
+ 'CN': 'China',
+ 'CX': 'Christmas Island',
+ 'CC': 'Cocos (Keeling) Islands',
+ 'CO': 'Colombia',
+ 'KM': 'Comoros',
+ 'CG': 'Congo',
+ 'CD': 'Congo, the Democratic Republic of the',
+ 'CK': 'Cook Islands',
+ 'CR': 'Costa Rica',
+ 'CI': 'Côte d\'Ivoire',
+ 'HR': 'Croatia',
+ 'CU': 'Cuba',
+ 'CW': 'Curaçao',
+ 'CY': 'Cyprus',
+ 'CZ': 'Czech Republic',
+ 'DK': 'Denmark',
+ 'DJ': 'Djibouti',
+ 'DM': 'Dominica',
+ 'DO': 'Dominican Republic',
+ 'EC': 'Ecuador',
+ 'EG': 'Egypt',
+ 'SV': 'El Salvador',
+ 'GQ': 'Equatorial Guinea',
+ 'ER': 'Eritrea',
+ 'EE': 'Estonia',
+ 'ET': 'Ethiopia',
+ 'FK': 'Falkland Islands (Malvinas)',
+ 'FO': 'Faroe Islands',
+ 'FJ': 'Fiji',
+ 'FI': 'Finland',
+ 'FR': 'France',
+ 'GF': 'French Guiana',
+ 'PF': 'French Polynesia',
+ 'TF': 'French Southern Territories',
+ 'GA': 'Gabon',
+ 'GM': 'Gambia',
+ 'GE': 'Georgia',
+ 'DE': 'Germany',
+ 'GH': 'Ghana',
+ 'GI': 'Gibraltar',
+ 'GR': 'Greece',
+ 'GL': 'Greenland',
+ 'GD': 'Grenada',
+ 'GP': 'Guadeloupe',
+ 'GU': 'Guam',
+ 'GT': 'Guatemala',
+ 'GG': 'Guernsey',
+ 'GN': 'Guinea',
+ 'GW': 'Guinea-Bissau',
+ 'GY': 'Guyana',
+ 'HT': 'Haiti',
+ 'HM': 'Heard Island and McDonald Islands',
+ 'VA': 'Holy See (Vatican City State)',
+ 'HN': 'Honduras',
+ 'HK': 'Hong Kong',
+ 'HU': 'Hungary',
+ 'IS': 'Iceland',
+ 'IN': 'India',
+ 'ID': 'Indonesia',
+ 'IR': 'Iran, Islamic Republic of',
+ 'IQ': 'Iraq',
+ 'IE': 'Ireland',
+ 'IM': 'Isle of Man',
+ 'IL': 'Israel',
+ 'IT': 'Italy',
+ 'JM': 'Jamaica',
+ 'JP': 'Japan',
+ 'JE': 'Jersey',
+ 'JO': 'Jordan',
+ 'KZ': 'Kazakhstan',
+ 'KE': 'Kenya',
+ 'KI': 'Kiribati',
+ 'KP': 'Korea, Democratic People\'s Republic of',
+ 'KR': 'Korea, Republic of',
+ 'KW': 'Kuwait',
+ 'KG': 'Kyrgyzstan',
+ 'LA': 'Lao People\'s Democratic Republic',
+ 'LV': 'Latvia',
+ 'LB': 'Lebanon',
+ 'LS': 'Lesotho',
+ 'LR': 'Liberia',
+ 'LY': 'Libya',
+ 'LI': 'Liechtenstein',
+ 'LT': 'Lithuania',
+ 'LU': 'Luxembourg',
+ 'MO': 'Macao',
+ 'MK': 'Macedonia, the Former Yugoslav Republic of',
+ 'MG': 'Madagascar',
+ 'MW': 'Malawi',
+ 'MY': 'Malaysia',
+ 'MV': 'Maldives',
+ 'ML': 'Mali',
+ 'MT': 'Malta',
+ 'MH': 'Marshall Islands',
+ 'MQ': 'Martinique',
+ 'MR': 'Mauritania',
+ 'MU': 'Mauritius',
+ 'YT': 'Mayotte',
+ 'MX': 'Mexico',
+ 'FM': 'Micronesia, Federated States of',
+ 'MD': 'Moldova, Republic of',
+ 'MC': 'Monaco',
+ 'MN': 'Mongolia',
+ 'ME': 'Montenegro',
+ 'MS': 'Montserrat',
+ 'MA': 'Morocco',
+ 'MZ': 'Mozambique',
+ 'MM': 'Myanmar',
+ 'NA': 'Namibia',
+ 'NR': 'Nauru',
+ 'NP': 'Nepal',
+ 'NL': 'Netherlands',
+ 'NC': 'New Caledonia',
+ 'NZ': 'New Zealand',
+ 'NI': 'Nicaragua',
+ 'NE': 'Niger',
+ 'NG': 'Nigeria',
+ 'NU': 'Niue',
+ 'NF': 'Norfolk Island',
+ 'MP': 'Northern Mariana Islands',
+ 'NO': 'Norway',
+ 'OM': 'Oman',
+ 'PK': 'Pakistan',
+ 'PW': 'Palau',
+ 'PS': 'Palestine, State of',
+ 'PA': 'Panama',
+ 'PG': 'Papua New Guinea',
+ 'PY': 'Paraguay',
+ 'PE': 'Peru',
+ 'PH': 'Philippines',
+ 'PN': 'Pitcairn',
+ 'PL': 'Poland',
+ 'PT': 'Portugal',
+ 'PR': 'Puerto Rico',
+ 'QA': 'Qatar',
+ 'RE': 'Réunion',
+ 'RO': 'Romania',
+ 'RU': 'Russian Federation',
+ 'RW': 'Rwanda',
+ 'BL': 'Saint Barthélemy',
+ 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
+ 'KN': 'Saint Kitts and Nevis',
+ 'LC': 'Saint Lucia',
+ 'MF': 'Saint Martin (French part)',
+ 'PM': 'Saint Pierre and Miquelon',
+ 'VC': 'Saint Vincent and the Grenadines',
+ 'WS': 'Samoa',
+ 'SM': 'San Marino',
+ 'ST': 'Sao Tome and Principe',
+ 'SA': 'Saudi Arabia',
+ 'SN': 'Senegal',
+ 'RS': 'Serbia',
+ 'SC': 'Seychelles',
+ 'SL': 'Sierra Leone',
+ 'SG': 'Singapore',
+ 'SX': 'Sint Maarten (Dutch part)',
+ 'SK': 'Slovakia',
+ 'SI': 'Slovenia',
+ 'SB': 'Solomon Islands',
+ 'SO': 'Somalia',
+ 'ZA': 'South Africa',
+ 'GS': 'South Georgia and the South Sandwich Islands',
+ 'SS': 'South Sudan',
+ 'ES': 'Spain',
+ 'LK': 'Sri Lanka',
+ 'SD': 'Sudan',
+ 'SR': 'Suriname',
+ 'SJ': 'Svalbard and Jan Mayen',
+ 'SZ': 'Swaziland',
+ 'SE': 'Sweden',
+ 'CH': 'Switzerland',
+ 'SY': 'Syrian Arab Republic',
+ 'TW': 'Taiwan, Province of China',
+ 'TJ': 'Tajikistan',
+ 'TZ': 'Tanzania, United Republic of',
+ 'TH': 'Thailand',
+ 'TL': 'Timor-Leste',
+ 'TG': 'Togo',
+ 'TK': 'Tokelau',
+ 'TO': 'Tonga',
+ 'TT': 'Trinidad and Tobago',
+ 'TN': 'Tunisia',
+ 'TR': 'Turkey',
+ 'TM': 'Turkmenistan',
+ 'TC': 'Turks and Caicos Islands',
+ 'TV': 'Tuvalu',
+ 'UG': 'Uganda',
+ 'UA': 'Ukraine',
+ 'AE': 'United Arab Emirates',
+ 'GB': 'United Kingdom',
+ 'US': 'United States',
+ 'UM': 'United States Minor Outlying Islands',
+ 'UY': 'Uruguay',
+ 'UZ': 'Uzbekistan',
+ 'VU': 'Vanuatu',
+ 'VE': 'Venezuela, Bolivarian Republic of',
+ 'VN': 'Viet Nam',
+ 'VG': 'Virgin Islands, British',
+ 'VI': 'Virgin Islands, U.S.',
+ 'WF': 'Wallis and Futuna',
+ 'EH': 'Western Sahara',
+ 'YE': 'Yemen',
+ 'ZM': 'Zambia',
+ 'ZW': 'Zimbabwe',
+ }
+
+ @classmethod
+ def short2full(cls, code):
+ """Convert an ISO 3166-2 country code to the corresponding full name"""
+ return cls._country_map.get(code.upper())
+
+
+class GeoUtils(object):
+ # Major IPv4 address blocks per country
+ _country_ip_map = {
+ 'AD': '46.172.224.0/19',
+ 'AE': '94.200.0.0/13',
+ 'AF': '149.54.0.0/17',
+ 'AG': '209.59.64.0/18',
+ 'AI': '204.14.248.0/21',
+ 'AL': '46.99.0.0/16',
+ 'AM': '46.70.0.0/15',
+ 'AO': '105.168.0.0/13',
+ 'AP': '182.50.184.0/21',
+ 'AQ': '23.154.160.0/24',
+ 'AR': '181.0.0.0/12',
+ 'AS': '202.70.112.0/20',
+ 'AT': '77.116.0.0/14',
+ 'AU': '1.128.0.0/11',
+ 'AW': '181.41.0.0/18',
+ 'AX': '185.217.4.0/22',
+ 'AZ': '5.197.0.0/16',
+ 'BA': '31.176.128.0/17',
+ 'BB': '65.48.128.0/17',
+ 'BD': '114.130.0.0/16',
+ 'BE': '57.0.0.0/8',
+ 'BF': '102.178.0.0/15',
+ 'BG': '95.42.0.0/15',
+ 'BH': '37.131.0.0/17',
+ 'BI': '154.117.192.0/18',
+ 'BJ': '137.255.0.0/16',
+ 'BL': '185.212.72.0/23',
+ 'BM': '196.12.64.0/18',
+ 'BN': '156.31.0.0/16',
+ 'BO': '161.56.0.0/16',
+ 'BQ': '161.0.80.0/20',
+ 'BR': '191.128.0.0/12',
+ 'BS': '24.51.64.0/18',
+ 'BT': '119.2.96.0/19',
+ 'BW': '168.167.0.0/16',
+ 'BY': '178.120.0.0/13',
+ 'BZ': '179.42.192.0/18',
+ 'CA': '99.224.0.0/11',
+ 'CD': '41.243.0.0/16',
+ 'CF': '197.242.176.0/21',
+ 'CG': '160.113.0.0/16',
+ 'CH': '85.0.0.0/13',
+ 'CI': '102.136.0.0/14',
+ 'CK': '202.65.32.0/19',
+ 'CL': '152.172.0.0/14',
+ 'CM': '102.244.0.0/14',
+ 'CN': '36.128.0.0/10',
+ 'CO': '181.240.0.0/12',
+ 'CR': '201.192.0.0/12',
+ 'CU': '152.206.0.0/15',
+ 'CV': '165.90.96.0/19',
+ 'CW': '190.88.128.0/17',
+ 'CY': '31.153.0.0/16',
+ 'CZ': '88.100.0.0/14',
+ 'DE': '53.0.0.0/8',
+ 'DJ': '197.241.0.0/17',
+ 'DK': '87.48.0.0/12',
+ 'DM': '192.243.48.0/20',
+ 'DO': '152.166.0.0/15',
+ 'DZ': '41.96.0.0/12',
+ 'EC': '186.68.0.0/15',
+ 'EE': '90.190.0.0/15',
+ 'EG': '156.160.0.0/11',
+ 'ER': '196.200.96.0/20',
+ 'ES': '88.0.0.0/11',
+ 'ET': '196.188.0.0/14',
+ 'EU': '2.16.0.0/13',
+ 'FI': '91.152.0.0/13',
+ 'FJ': '144.120.0.0/16',
+ 'FK': '80.73.208.0/21',
+ 'FM': '119.252.112.0/20',
+ 'FO': '88.85.32.0/19',
+ 'FR': '90.0.0.0/9',
+ 'GA': '41.158.0.0/15',
+ 'GB': '25.0.0.0/8',
+ 'GD': '74.122.88.0/21',
+ 'GE': '31.146.0.0/16',
+ 'GF': '161.22.64.0/18',
+ 'GG': '62.68.160.0/19',
+ 'GH': '154.160.0.0/12',
+ 'GI': '95.164.0.0/16',
+ 'GL': '88.83.0.0/19',
+ 'GM': '160.182.0.0/15',
+ 'GN': '197.149.192.0/18',
+ 'GP': '104.250.0.0/19',
+ 'GQ': '105.235.224.0/20',
+ 'GR': '94.64.0.0/13',
+ 'GT': '168.234.0.0/16',
+ 'GU': '168.123.0.0/16',
+ 'GW': '197.214.80.0/20',
+ 'GY': '181.41.64.0/18',
+ 'HK': '113.252.0.0/14',
+ 'HN': '181.210.0.0/16',
+ 'HR': '93.136.0.0/13',
+ 'HT': '148.102.128.0/17',
+ 'HU': '84.0.0.0/14',
+ 'ID': '39.192.0.0/10',
+ 'IE': '87.32.0.0/12',
+ 'IL': '79.176.0.0/13',
+ 'IM': '5.62.80.0/20',
+ 'IN': '117.192.0.0/10',
+ 'IO': '203.83.48.0/21',
+ 'IQ': '37.236.0.0/14',
+ 'IR': '2.176.0.0/12',
+ 'IS': '82.221.0.0/16',
+ 'IT': '79.0.0.0/10',
+ 'JE': '87.244.64.0/18',
+ 'JM': '72.27.0.0/17',
+ 'JO': '176.29.0.0/16',
+ 'JP': '133.0.0.0/8',
+ 'KE': '105.48.0.0/12',
+ 'KG': '158.181.128.0/17',
+ 'KH': '36.37.128.0/17',
+ 'KI': '103.25.140.0/22',
+ 'KM': '197.255.224.0/20',
+ 'KN': '198.167.192.0/19',
+ 'KP': '175.45.176.0/22',
+ 'KR': '175.192.0.0/10',
+ 'KW': '37.36.0.0/14',
+ 'KY': '64.96.0.0/15',
+ 'KZ': '2.72.0.0/13',
+ 'LA': '115.84.64.0/18',
+ 'LB': '178.135.0.0/16',
+ 'LC': '24.92.144.0/20',
+ 'LI': '82.117.0.0/19',
+ 'LK': '112.134.0.0/15',
+ 'LR': '102.183.0.0/16',
+ 'LS': '129.232.0.0/17',
+ 'LT': '78.56.0.0/13',
+ 'LU': '188.42.0.0/16',
+ 'LV': '46.109.0.0/16',
+ 'LY': '41.252.0.0/14',
+ 'MA': '105.128.0.0/11',
+ 'MC': '88.209.64.0/18',
+ 'MD': '37.246.0.0/16',
+ 'ME': '178.175.0.0/17',
+ 'MF': '74.112.232.0/21',
+ 'MG': '154.126.0.0/17',
+ 'MH': '117.103.88.0/21',
+ 'MK': '77.28.0.0/15',
+ 'ML': '154.118.128.0/18',
+ 'MM': '37.111.0.0/17',
+ 'MN': '49.0.128.0/17',
+ 'MO': '60.246.0.0/16',
+ 'MP': '202.88.64.0/20',
+ 'MQ': '109.203.224.0/19',
+ 'MR': '41.188.64.0/18',
+ 'MS': '208.90.112.0/22',
+ 'MT': '46.11.0.0/16',
+ 'MU': '105.16.0.0/12',
+ 'MV': '27.114.128.0/18',
+ 'MW': '102.70.0.0/15',
+ 'MX': '187.192.0.0/11',
+ 'MY': '175.136.0.0/13',
+ 'MZ': '197.218.0.0/15',
+ 'NA': '41.182.0.0/16',
+ 'NC': '101.101.0.0/18',
+ 'NE': '197.214.0.0/18',
+ 'NF': '203.17.240.0/22',
+ 'NG': '105.112.0.0/12',
+ 'NI': '186.76.0.0/15',
+ 'NL': '145.96.0.0/11',
+ 'NO': '84.208.0.0/13',
+ 'NP': '36.252.0.0/15',
+ 'NR': '203.98.224.0/19',
+ 'NU': '49.156.48.0/22',
+ 'NZ': '49.224.0.0/14',
+ 'OM': '5.36.0.0/15',
+ 'PA': '186.72.0.0/15',
+ 'PE': '186.160.0.0/14',
+ 'PF': '123.50.64.0/18',
+ 'PG': '124.240.192.0/19',
+ 'PH': '49.144.0.0/13',
+ 'PK': '39.32.0.0/11',
+ 'PL': '83.0.0.0/11',
+ 'PM': '70.36.0.0/20',
+ 'PR': '66.50.0.0/16',
+ 'PS': '188.161.0.0/16',
+ 'PT': '85.240.0.0/13',
+ 'PW': '202.124.224.0/20',
+ 'PY': '181.120.0.0/14',
+ 'QA': '37.210.0.0/15',
+ 'RE': '102.35.0.0/16',
+ 'RO': '79.112.0.0/13',
+ 'RS': '93.86.0.0/15',
+ 'RU': '5.136.0.0/13',
+ 'RW': '41.186.0.0/16',
+ 'SA': '188.48.0.0/13',
+ 'SB': '202.1.160.0/19',
+ 'SC': '154.192.0.0/11',
+ 'SD': '102.120.0.0/13',
+ 'SE': '78.64.0.0/12',
+ 'SG': '8.128.0.0/10',
+ 'SI': '188.196.0.0/14',
+ 'SK': '78.98.0.0/15',
+ 'SL': '102.143.0.0/17',
+ 'SM': '89.186.32.0/19',
+ 'SN': '41.82.0.0/15',
+ 'SO': '154.115.192.0/18',
+ 'SR': '186.179.128.0/17',
+ 'SS': '105.235.208.0/21',
+ 'ST': '197.159.160.0/19',
+ 'SV': '168.243.0.0/16',
+ 'SX': '190.102.0.0/20',
+ 'SY': '5.0.0.0/16',
+ 'SZ': '41.84.224.0/19',
+ 'TC': '65.255.48.0/20',
+ 'TD': '154.68.128.0/19',
+ 'TG': '196.168.0.0/14',
+ 'TH': '171.96.0.0/13',
+ 'TJ': '85.9.128.0/18',
+ 'TK': '27.96.24.0/21',
+ 'TL': '180.189.160.0/20',
+ 'TM': '95.85.96.0/19',
+ 'TN': '197.0.0.0/11',
+ 'TO': '175.176.144.0/21',
+ 'TR': '78.160.0.0/11',
+ 'TT': '186.44.0.0/15',
+ 'TV': '202.2.96.0/19',
+ 'TW': '120.96.0.0/11',
+ 'TZ': '156.156.0.0/14',
+ 'UA': '37.52.0.0/14',
+ 'UG': '102.80.0.0/13',
+ 'US': '6.0.0.0/8',
+ 'UY': '167.56.0.0/13',
+ 'UZ': '84.54.64.0/18',
+ 'VA': '212.77.0.0/19',
+ 'VC': '207.191.240.0/21',
+ 'VE': '186.88.0.0/13',
+ 'VG': '66.81.192.0/20',
+ 'VI': '146.226.0.0/16',
+ 'VN': '14.160.0.0/11',
+ 'VU': '202.80.32.0/20',
+ 'WF': '117.20.32.0/21',
+ 'WS': '202.4.32.0/19',
+ 'YE': '134.35.0.0/16',
+ 'YT': '41.242.116.0/22',
+ 'ZA': '41.0.0.0/11',
+ 'ZM': '102.144.0.0/13',
+ 'ZW': '102.177.192.0/18',
+ }
+
+ @classmethod
+ def random_ipv4(cls, code_or_block):
+ if len(code_or_block) == 2:
+ block = cls._country_ip_map.get(code_or_block.upper())
+ if not block:
+ return None
+ else:
+ block = code_or_block
+ addr, preflen = block.split('/')
+ addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
+ addr_max = addr_min | (0xffffffff >> int(preflen))
+ return compat_str(socket.inet_ntoa(
+ compat_struct_pack('!L', random.randint(addr_min, addr_max))))
+
+
+class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
+ def __init__(self, proxies=None):
+ # Set default handlers
+ for type in ('http', 'https'):
+ setattr(self, '%s_open' % type,
+ lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
+ meth(r, proxy, type))
+ compat_urllib_request.ProxyHandler.__init__(self, proxies)
+
+ def proxy_open(self, req, proxy, type):
+ req_proxy = req.headers.get('Ytdl-request-proxy')
+ if req_proxy is not None:
+ proxy = req_proxy
+ del req.headers['Ytdl-request-proxy']
+
+ if proxy == '__noproxy__':
+ return None # No Proxy
+ if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
+ req.add_header('Ytdl-socks-proxy', proxy)
+ # youtube-dlc's http/https handlers do wrapping the socket with socks
+ return None
+ return compat_urllib_request.ProxyHandler.proxy_open(
+ self, req, proxy, type)
+
+
+# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
+# released into Public Domain
+# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
+
+def long_to_bytes(n, blocksize=0):
+ """long_to_bytes(n:long, blocksize:int) : string
+ Convert a long integer to a byte string.
+
+ If optional blocksize is given and greater than zero, pad the front of the
+ byte string with binary zeros so that the length is a multiple of
+ blocksize.
+ """
+ # after much testing, this algorithm was deemed to be the fastest
+ s = b''
+ n = int(n)
+ while n > 0:
+ s = compat_struct_pack('>I', n & 0xffffffff) + s
+ n = n >> 32
+ # strip off leading zeros
+ for i in range(len(s)):
+ if s[i] != b'\000'[0]:
+ break
+ else:
+ # only happens when n == 0
+ s = b'\000'
+ i = 0
+ s = s[i:]
+ # add back some pad bytes. this could be done more efficiently w.r.t. the
+ # de-padding being done above, but sigh...
+ if blocksize > 0 and len(s) % blocksize:
+ s = (blocksize - len(s) % blocksize) * b'\000' + s
+ return s
+
+
+def bytes_to_long(s):
+ """bytes_to_long(string) : long
+ Convert a byte string to a long integer.
+
+ This is (essentially) the inverse of long_to_bytes().
+ """
+ acc = 0
+ length = len(s)
+ if length % 4:
+ extra = (4 - length % 4)
+ s = b'\000' * extra + s
+ length = length + extra
+ for i in range(0, length, 4):
+ acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
+ return acc
+
+
+def ohdave_rsa_encrypt(data, exponent, modulus):
+ '''
+ Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
+
+ Input:
+ data: data to encrypt, bytes-like object
+ exponent, modulus: parameter e and N of RSA algorithm, both integer
+ Output: hex string of encrypted data
+
+ Limitation: supports one block encryption only
+ '''
+
+ payload = int(binascii.hexlify(data[::-1]), 16)
+ encrypted = pow(payload, exponent, modulus)
+ return '%x' % encrypted
+
+
+def pkcs1pad(data, length):
+ """
+ Padding input data with PKCS#1 scheme
+
+ @param {int[]} data input data
+ @param {int} length target length
+ @returns {int[]} padded data
+ """
+ if len(data) > length - 11:
+ raise ValueError('Input data too long for PKCS#1 padding')
+
+ pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
+ return [0, 2] + pseudo_random + [0] + data
+
+
+def encode_base_n(num, n, table=None):
+ FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
+ if not table:
+ table = FULL_TABLE[:n]
+
+ if n > len(table):
+ raise ValueError('base %d exceeds table length %d' % (n, len(table)))
+
+ if num == 0:
+ return table[0]
+
+ ret = ''
+ while num:
+ ret = table[num % n] + ret
+ num = num // n
+ return ret
+
+
+def decode_packed_codes(code):
+ mobj = re.search(PACKED_CODES_RE, code)
+ obfucasted_code, base, count, symbols = mobj.groups()
+ base = int(base)
+ count = int(count)
+ symbols = symbols.split('|')
+ symbol_table = {}
+
+ while count:
+ count -= 1
+ base_n_count = encode_base_n(count, base)
+ symbol_table[base_n_count] = symbols[count] or base_n_count
+
+ return re.sub(
+ r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
+ obfucasted_code)
+
+
+def caesar(s, alphabet, shift):
+ if shift == 0:
+ return s
+ l = len(alphabet)
+ return ''.join(
+ alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
+ for c in s)
+
+
+def rot47(s):
+ return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
+
+
+def parse_m3u8_attributes(attrib):
+ info = {}
+ for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
+ if val.startswith('"'):
+ val = val[1:-1]
+ info[key] = val
+ return info
+
+
+def urshift(val, n):
+ return val >> n if val >= 0 else (val + 0x100000000) >> n
+
+
+# Based on png2str() written by @gdkchan and improved by @yokrysty
+# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
+def decode_png(png_data):
+ # Reference: https://www.w3.org/TR/PNG/
+ header = png_data[8:]
+
+ if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
+ raise IOError('Not a valid PNG file.')
+
+ int_map = {1: '>B', 2: '>H', 4: '>I'}
+ unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
+
+ chunks = []
+
+ while header:
+ length = unpack_integer(header[:4])
+ header = header[4:]
+
+ chunk_type = header[:4]
+ header = header[4:]
+
+ chunk_data = header[:length]
+ header = header[length:]
+
+ header = header[4:] # Skip CRC
+
+ chunks.append({
+ 'type': chunk_type,
+ 'length': length,
+ 'data': chunk_data
+ })
+
+ ihdr = chunks[0]['data']
+
+ width = unpack_integer(ihdr[:4])
+ height = unpack_integer(ihdr[4:8])
+
+ idat = b''
+
+ for chunk in chunks:
+ if chunk['type'] == b'IDAT':
+ idat += chunk['data']
+
+ if not idat:
+ raise IOError('Unable to read PNG data.')
+
+ decompressed_data = bytearray(zlib.decompress(idat))
+
+ stride = width * 3
+ pixels = []
+
+ def _get_pixel(idx):
+ x = idx % stride
+ y = idx // stride
+ return pixels[y][x]
+
+ for y in range(height):
+ basePos = y * (1 + stride)
+ filter_type = decompressed_data[basePos]
+
+ current_row = []
+
+ pixels.append(current_row)
+
+ for x in range(stride):
+ color = decompressed_data[1 + basePos + x]
+ basex = y * stride + x
+ left = 0
+ up = 0
+
+ if x > 2:
+ left = _get_pixel(basex - 3)
+ if y > 0:
+ up = _get_pixel(basex - stride)
+
+ if filter_type == 1: # Sub
+ color = (color + left) & 0xff
+ elif filter_type == 2: # Up
+ color = (color + up) & 0xff
+ elif filter_type == 3: # Average
+ color = (color + ((left + up) >> 1)) & 0xff
+ elif filter_type == 4: # Paeth
+ a = left
+ b = up
+ c = 0
+
+ if x > 2 and y > 0:
+ c = _get_pixel(basex - stride - 3)
+
+ p = a + b - c
+
+ pa = abs(p - a)
+ pb = abs(p - b)
+ pc = abs(p - c)
+
+ if pa <= pb and pa <= pc:
+ color = (color + a) & 0xff
+ elif pb <= pc:
+ color = (color + b) & 0xff
+ else:
+ color = (color + c) & 0xff
+
+ current_row.append(color)
+
+ return width, height, pixels
+
+
+def write_xattr(path, key, value):
+ # This mess below finds the best xattr tool for the job
+ try:
+ # try the pyxattr module...
+ import xattr
+
+ if hasattr(xattr, 'set'): # pyxattr
+ # Unicode arguments are not supported in python-pyxattr until
+ # version 0.5.0
+ # See https://github.com/ytdl-org/youtube-dl/issues/5498
+ pyxattr_required_version = '0.5.0'
+ if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
+ # TODO: fallback to CLI tools
+ raise XAttrUnavailableError(
+ 'python-pyxattr is detected but is too old. '
+ 'youtube-dlc requires %s or above while your version is %s. '
+ 'Falling back to other xattr implementations' % (
+ pyxattr_required_version, xattr.__version__))
+
+ setxattr = xattr.set
+ else: # xattr
+ setxattr = xattr.setxattr
+
+ try:
+ setxattr(path, key, value)
+ except EnvironmentError as e:
+ raise XAttrMetadataError(e.errno, e.strerror)
+
+ except ImportError:
+ if compat_os_name == 'nt':
+ # Write xattrs to NTFS Alternate Data Streams:
+ # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
+ assert ':' not in key
+ assert os.path.exists(path)
+
+ ads_fn = path + ':' + key
+ try:
+ with open(ads_fn, 'wb') as f:
+ f.write(value)
+ except EnvironmentError as e:
+ raise XAttrMetadataError(e.errno, e.strerror)
+ else:
+ user_has_setfattr = check_executable('setfattr', ['--version'])
+ user_has_xattr = check_executable('xattr', ['-h'])
+
+ if user_has_setfattr or user_has_xattr:
+
+ value = value.decode('utf-8')
+ if user_has_setfattr:
+ executable = 'setfattr'
+ opts = ['-n', key, '-v', value]
+ elif user_has_xattr:
+ executable = 'xattr'
+ opts = ['-w', key, value]
+
+ cmd = ([encodeFilename(executable, True)]
+ + [encodeArgument(o) for o in opts]
+ + [encodeFilename(path, True)])
+
+ try:
+ p = subprocess.Popen(
+ cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+ except EnvironmentError as e:
+ raise XAttrMetadataError(e.errno, e.strerror)
+ stdout, stderr = p.communicate()
+ stderr = stderr.decode('utf-8', 'replace')
+ if p.returncode != 0:
+ raise XAttrMetadataError(p.returncode, stderr)
+
+ else:
+ # On Unix, and can't find pyxattr, setfattr, or xattr.
+ if sys.platform.startswith('linux'):
+ raise XAttrUnavailableError(
+ "Couldn't find a tool to set the xattrs. "
+ "Install either the python 'pyxattr' or 'xattr' "
+ "modules, or the GNU 'attr' package "
+ "(which contains the 'setfattr' tool).")
+ else:
+ raise XAttrUnavailableError(
+ "Couldn't find a tool to set the xattrs. "
+ "Install either the python 'xattr' module, "
+ "or the 'xattr' binary.")
+
+
+def random_birthday(year_field, month_field, day_field):
+ start_date = datetime.date(1950, 1, 1)
+ end_date = datetime.date(1995, 12, 31)
+ offset = random.randint(0, (end_date - start_date).days)
+ random_date = start_date + datetime.timedelta(offset)
+ return {
+ year_field: str(random_date.year),
+ month_field: str(random_date.month),
+ day_field: str(random_date.day),
+ }
diff --git a/youtube_dlc/version.py b/youtube_dlc/version.py
new file mode 100644
index 000000000..9cabf84e2
--- /dev/null
+++ b/youtube_dlc/version.py
@@ -0,0 +1,3 @@
+from __future__ import unicode_literals
+
+__version__ = '2020.09.03.1'